diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 5d5b2c4e6..134967511 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -105,7 +105,7 @@ jobs:
       shell: bash
       working-directory: ${{github.workspace}}/build/compiler
       run: |
-        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
+        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-eri2=0 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
         make -j3
         make check
         cd src/bin/test_eri && ./stdtests.pl && cd ../../..
diff --git a/configure.ac b/configure.ac
index df08efb8b..aded39d1f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -341,6 +341,7 @@ esac
 )
 
 LIBINT_HARD_MAX_AM=12
+AC_DEFINE_UNQUOTED(LIBINT_HARD_MAX_AM,$LIBINT_HARD_MAX_AM)
 LIBINT_MAX_AM=4
 AC_ARG_WITH(max-am,
 AS_HELP_STRING([--with-max-am=N],[Support Gaussians of angular momentum up to N. Can specify values for each derivative levels as a list N0,N1,N2...]),
@@ -528,7 +529,7 @@ AS_HELP_STRING([--with-eri3-max-am=N],[Support 3-center ERIs for Gaussians of an
      else
       ERI3_MAX_AM=$withval
      fi
-     if test $ERI3_MAX_AM -ge $LIBINT_HARD_MAX_AM; then
+     if test $ERI3_MAX_AM -gt $LIBINT_HARD_MAX_AM; then
       AC_MSG_ERROR([Value for --with-eri3-max-am too high ($withval). Are you sure you know what you are doing?])
      fi
      AC_SUBST(ERI3_MAX_AM)
@@ -599,7 +600,7 @@ AS_HELP_STRING([--with-eri2-max-am=N],[Support 2-center ERIs for Gaussians of an
      else
       ERI2_MAX_AM=$withval
      fi
-     if test $ERI2_MAX_AM -ge $LIBINT_HARD_MAX_AM; then
+     if test $ERI2_MAX_AM -gt $LIBINT_HARD_MAX_AM; then
       AC_MSG_ERROR([Value for --with-eri2-max-am too high ($withval). Are you sure you know what you are doing?])
      fi
      AC_SUBST(ERI2_MAX_AM)
diff --git a/export/cmake/CMakeLists.txt.export b/export/cmake/CMakeLists.txt.export
index 8c2b78a43..e27a9c821 100644
--- a/export/cmake/CMakeLists.txt.export
+++ b/export/cmake/CMakeLists.txt.export
@@ -429,6 +429,7 @@ if (LIBINT_HAS_CXX_API)
             tests/unit/test-c-api.cc
             tests/unit/test-core.cc
             tests/unit/test-core-ints.cc
+            tests/unit/test-df-autogen.cc
             tests/unit/test-permute.cc
             tests/unit/test-precision.cc
             tests/unit/test-shell-order.cc
diff --git a/include/libint2/config.h.in b/include/libint2/config.h.in
index bd42b26e2..8c807f5f1 100644
--- a/include/libint2/config.h.in
+++ b/include/libint2/config.h.in
@@ -47,6 +47,9 @@
 /* Prefix for all names in API */
 #undef LIBINT_API_PREFIX
 
+/* Max AM supported by Libint *in principle* */
+#undef LIBINT_HARD_MAX_AM
+
 /* Max AM (same for all derivatives; if not defined see LIBINT_MAX_AM_LIST) */
 #undef LIBINT_MAX_AM
 
diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
new file mode 100644
index 000000000..750f54b69
--- /dev/null
+++ b/include/libint2/dfbs_generator.h
@@ -0,0 +1,321 @@
+/*
+ *  Copyright (C) 2004-2024 Edward F. Valeev
+ *
+ *  This file is part of Libint.
+ *
+ *  Libint is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Libint is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _libint2_src_lib_libint_dfbs_generator_h_
+#define _libint2_src_lib_libint_dfbs_generator_h_
+
+#include <libint2.h>
+#include <libint2/atom.h>
+#include <libint2/basis.h>
+#include <libint2/boys.h>
+#include <libint2/pivoted_cholesky.h>
+
+#include <Eigen/Dense>
+#include <Eigen/Eigenvalues>
+#include <algorithm>
+#include <cmath>
+
+namespace libint2 {
+
+namespace detail {
+
+/// @brief Uncontracts a set of shells
+/// @param[in] shells a vector of shells to be uncontracted
+/// @return a vector of uncontracted shells
+inline std::vector<Shell> uncontract(const std::vector<Shell> &shells) {
+  std::vector<Shell> primitive_shells;
+  for (const auto &contracted_shell : shells) {
+    for (size_t p = 0; p < contracted_shell.nprim(); p++) {
+      const auto prim_shell = contracted_shell.extract_primitive(p, true);
+      // if dealing with generally contracted basis (e.g., cc-pvxz) represented
+      // as a segmented basis need to remove duplicates
+      if (std::find(primitive_shells.begin(), primitive_shells.end(),
+                    prim_shell) == primitive_shells.end())
+        primitive_shells.emplace_back(std::move(prim_shell));
+    }
+  }
+  return primitive_shells;
+}
+
+/// @brief returns \f$ \Gamma(x) \f$
+inline double gamma_function(const double x) { return std::tgamma(x); }
+
+/// @brief Computes an effective exponent for a product of two primitive
+/// gaussians as as described in J. Chem. Theory Comput. 2021, 17, 6886−6900.
+/// \f$ \alpha_{eff} = \left[ \frac{\Gamma(L+2)\Gamma(l_\mu +l_\nu +
+/// \frac{3}{2})}{\Gamma(L+ \frac{3}{2})\Gamma(l_\mu +l_\nu + 2)} \right]^2
+/// (\alpha_\mu + \alpha_\nu) \f$
+/// @param shell1 first primitive shell
+/// @param shell2 second primitive shell
+/// @param L total angular momentum of product function
+/// @return effective exponent of product function
+inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int L) {
+  const auto alpha1 = shell1.alpha[0];
+  const auto alpha2 = shell2.alpha[0];
+  const auto l1 = shell1.contr[0].l;
+  const auto l2 = shell2.contr[0].l;
+  const auto prefactor =
+      std::pow((gamma_function(L + 2.) * gamma_function(l1 + l2 + 1.5)) /
+                   (gamma_function(l1 + l2 + 2.) * gamma_function(L + 1.5)),
+               2.);
+  return prefactor * (alpha1 + alpha2);
+}
+
+/// @brief Creates a set of product functions from a set of primitive shells.
+/// Each pair of primitive shells produces a set of product functions with an
+/// effective exponent (\f$ \alpha_{eff} \f$ as described above) and angular
+/// momentum ranging from \f$ |l1-l2| \leq L \leq l1+l2 \f$ as described in J.
+/// Chem. Theory Comput. 2021, 17, 6886−6900.
+/// @param primitive_shells set of primitive shells
+/// @return set of product functions
+inline std::vector<Shell> product_functions(
+    const std::vector<Shell> &primitive_shells) {
+  std::vector<Shell> product_functions;
+  for (size_t i = 0; i < primitive_shells.size(); ++i) {
+    for (size_t j = 0; j <= i; ++j) {
+      const auto li = primitive_shells[i].contr[0].l;
+      const auto lj = primitive_shells[j].contr[0].l;
+      for (auto L = std::abs(li - lj); L <= li + lj; L++) {
+        const auto alpha = libint2::svector<double>(
+            {alpha_eff(primitive_shells[i], primitive_shells[j], L)});
+        libint2::svector<Shell::Contraction> contr_;
+        Shell::Contraction contr1;
+        contr1.l = L;
+        contr1.pure = true;  // libint2 needs solid harmonics for 2c2b integrals
+        contr1.coeff = {1.0};
+        contr_.push_back(contr1);
+        assert(primitive_shells[i].O == primitive_shells[j].O);
+        const auto shell = Shell(alpha, contr_, primitive_shells[i].O);
+        if (std::find(product_functions.begin(), product_functions.end(),
+                      shell) == product_functions.end())
+          product_functions.emplace_back(shell);
+      }
+    }
+  }
+  return product_functions;
+}
+
+/// @brief returns a hash map of shell indices to basis function indices
+inline std::vector<size_t> map_shell_to_basis_function(
+    const std::vector<libint2::Shell> &shells) {
+  std::vector<size_t> result;
+  result.reserve(shells.size());
+
+  size_t n = 0;
+  for (auto &&shell : shells) {
+    result.push_back(n);
+    n += shell.size();
+  }
+
+  return result;
+}
+
+/// @brief Computes the Coulomb matrix \f$ (\mu|rij^{-1}|\nu) \f$  for a set of
+/// shells
+/// @param shells set of shells
+/// @return Coulomb matrix
+inline Eigen::MatrixXd compute_coulomb_matrix(
+    const std::vector<Shell> &shells) {
+  const auto n = nbf(shells);
+  Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
+  using libint2::Engine;
+  const auto oper = libint2::Operator::coulomb;
+  const auto braket = libint2::BraKet::xs_xs;
+  Engine engine(oper, max_nprim(shells), max_l(shells), 0,
+                std::numeric_limits<scalar_type>::epsilon(),
+                libint2::default_params(oper), braket);
+  engine.set(ScreeningMethod::Conservative);
+  const auto shell2bf = map_shell_to_basis_function(shells);
+  const auto &buf = engine.results();
+  for (size_t s1 = 0; s1 != shells.size(); ++s1) {
+    auto bf1 = shell2bf[s1];
+    auto n1 = shells[s1].size();
+    for (size_t s2 = 0; s2 <= s1; ++s2) {
+      auto bf2 = shell2bf[s2];
+      auto n2 = shells[s2].size();
+      engine.compute(shells[s1], shells[s2]);
+      Eigen::Map<const Eigen::MatrixXd> buf_mat(buf[0], n1, n2);
+      result.block(bf1, bf2, n1, n2) = buf_mat;
+      if (s1 != s2) result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
+    }
+  }
+  return result;
+}
+
+/// @brief Splits a set of shells by angular momentum
+/// @param shells set of shells
+/// @return vector of vectors of shells split by angular momentum L. The i-th
+///  vector contains all shells with angular momentum i
+inline std::vector<std::vector<Shell>> split_by_L(
+    const std::vector<Shell> &shells) {
+  int lmax = max_l(shells);
+  std::vector<std::vector<Shell>> sorted_shells;
+  sorted_shells.resize(lmax + 1);
+  for (auto &&shell : shells) {
+    auto l = shell.contr[0].l;
+    sorted_shells[l].push_back(shell);
+  }
+  return sorted_shells;
+}
+
+/// @brief Computes the reduced set of product functions via pivoted Cholesky
+/// decomposition as described in J. Chem. Theory Comput. 2021, 17, 6886−6900.
+/// Cholesky decomposition is performed on the Coulomb matrix of the product
+/// functions and the pivot indices are sorted in ascending order of column sums
+/// of the Coulomb matrix. The reduced set of product functions is then
+/// constructed by selecting the product functions corresponding to the pivot
+/// indices.
+/// @param shells set of shells
+/// @param cholesky_threshold threshold for choosing a product function via
+/// pivoted Cholesky decomposition
+/// @return reduced set of product functions
+inline std::vector<Shell> pivoted_cholesky_in_L(
+    const std::vector<Shell> &shells, const double cholesky_threshold) {
+  const auto n = shells.size();  // number of shells
+  std::vector<size_t>
+      shell_indices;  // hash map of basis function indices to shell indices
+  const auto L = shells[0].contr[0].l;  // all shells must have same L
+  const auto nbf = libint2::nbf(
+      shells);  // total number of basis functions in vector of shells
+  for (size_t i = 0; i < n; ++i) {
+    for (size_t j = 0; j < 2 * L + 1;
+         ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b
+      // integrals
+      shell_indices.push_back(i);
+  }
+  assert(shell_indices.size() == nbf);
+  const auto C = compute_coulomb_matrix(shells);
+  std::vector<size_t> pivot(nbf);
+  for (auto i = 0; i < nbf; ++i) {
+    pivot[i] = i;
+  }
+  // set pivot indices in ascending order of off diagonal elements of Coulomb
+  // matrix see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly
+  // repulsive interatomic potentials)
+  Eigen::MatrixXd C_off_diag = C;
+  auto col_sum = C_off_diag.colwise().sum();
+  // sort pivot indices in ascending order of column sums
+  std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) {
+    return col_sum[i1] < col_sum[i2];
+  });
+  // compute Cholesky decomposition
+  const auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
+
+  std::vector<Shell> reduced_shells;
+  for (size_t i = 0; i < reduced_pivots.size(); ++i) {
+    // check if the reduced shell is already in reduced shells
+    if (std::find(reduced_shells.begin(), reduced_shells.end(),
+                  shells[shell_indices[reduced_pivots[i]]]) ==
+        reduced_shells.end())
+      reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]);
+  }
+  return reduced_shells;
+}
+}  // namespace detail
+
+/// @brief This class produces density fitting basis sets for an atom from
+/// products of AO basis functions and eliminates linearly dependent functions
+/// via pivoted Cholesky decomposition see: J. Chem. Theory Comput. 2021, 17,
+/// 6886−6900 (Straightforward and Accurate Automatic Auxiliary Basis Set
+/// Generation for Molecular Calculations with Atomic Orbital Basis Sets)
+class DFBasisSetGenerator {
+ public:
+  /// @brief constructor for DFBasisSetGenerator class, generates density
+  /// fitting basis set from products of AO basis functions of and atom see: J.
+  /// Chem. Theory Comput. 2021, 17, 6886−6900 (Straightforward and Accurate
+  /// Automatic Auxiliary Basis Set Generation for Molecular Calculations with
+  /// Atomic Orbital Basis Sets)
+  /// @param obs_name name of basis set for AO functions
+  /// @param atoms vector of atoms
+  /// @param cholesky_threshold threshold for threshold for  pivoted Cholesky
+  /// decomposition to be performed on the Coulomb matrix of the product
+  /// functions
+  DFBasisSetGenerator(std::string obs_name, const Atom &atom,
+                      const double cholesky_threshold = 1e-7) {
+    // get AO basis shells for each atom
+    const auto atom_bs = BasisSet(obs_name, {atom});
+    const auto obs_shells = atom_bs.shells();
+    // get primitive shells from AO functions
+    const auto primitive_shells = detail::uncontract(obs_shells);
+    // compute candidate shells
+    candidate_shells_ = detail::product_functions(primitive_shells);
+    cholesky_threshold_ = cholesky_threshold;
+  }
+
+  /// @brief constructor for DFBasisSetGenerator class, generates density
+  /// fitting basis set from products of AO shells provided by user
+  /// @param shells vector of vector of shells for each atom
+  /// @param cholesky_threshold threshold for  pivoted Cholesky decomposition to
+  /// be performed on the Coulomb matrix of the product functions
+  DFBasisSetGenerator(const std::vector<Shell> &shells,
+                      const double cholesky_threshold = 1e-7) {
+    const auto primitive_shells = detail::uncontract(shells);
+    candidate_shells_ = detail::product_functions(primitive_shells);
+    cholesky_threshold_ = cholesky_threshold;
+  }
+
+  DFBasisSetGenerator() = default;
+
+  ~DFBasisSetGenerator() = default;
+
+  /// @brief returns the candidate shells (full set of product functions)
+  std::vector<Shell> candidate_shells() { return candidate_shells_; }
+
+  /// @brief returns a set of shells reduced via pivoted Cholesky
+  /// decomposition of the Coulomb matrix of the product functions for each
+  /// angular momentum L as described in J. Chem. Theory Comput. 2021, 17,
+  /// 6886−6900.
+  std::vector<Shell> reduced_shells() {
+    if (reduced_shells_computed_)
+      return reduced_shells_;
+    else {
+      const auto candidate_splitted_in_L =
+          detail::split_by_L(candidate_shells_);
+      for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
+        std::vector<Shell> reduced_shells_L;
+        if (candidate_splitted_in_L[i].size() > 1)
+          reduced_shells_L = detail::pivoted_cholesky_in_L(
+              candidate_splitted_in_L[i], cholesky_threshold_);
+        else
+          reduced_shells_L = candidate_splitted_in_L[i];
+        reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(),
+                               reduced_shells_L.end());
+      }
+      reduced_shells_computed_ = true;
+    }
+    return reduced_shells_;
+  }
+
+  /// @brief returns a BasisSet of shells reduced via pivoted Cholesky
+  /// decomposition of the Coulomb matrix of the product functions for each
+  /// angular momentum L as described in J. Chem. Theory Comput. 2021, 17,
+  /// 6886−6900.
+  const BasisSet reduced_basis() { return BasisSet(reduced_shells()); }
+
+ private:
+  double cholesky_threshold_;
+  std::vector<Shell> candidate_shells_;  // full set of product functions
+  std::vector<Shell> reduced_shells_;    // reduced set of product functions
+  bool reduced_shells_computed_ = false;
+};
+
+}  // namespace libint2
+
+#endif /* _libint2_src_lib_libint_dfbs_generator_h_ */
diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h
new file mode 100644
index 000000000..db88d60f9
--- /dev/null
+++ b/include/libint2/pivoted_cholesky.h
@@ -0,0 +1,134 @@
+/*
+ *  Copyright (C) 2004-2021 Edward F. Valeev
+ *
+ *  This file is part of Libint.
+ *
+ *  Libint is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Libint is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _libint2_src_lib_libint_pivoted_cholesky_h_
+#define _libint2_src_lib_libint_pivoted_cholesky_h_
+
+#include <Eigen/Dense>
+#include <algorithm>
+#include <iostream>
+
+namespace libint2 {
+
+/// @brief computes the pivoted Cholesky decomposition of a symmetric positive
+/// definite matrix
+/// @param A symmetric positive definite matrix
+/// @param tolerance tolerance for the error
+/// @param pivot initial pivot indices
+/// @return pivoted Cholesky decomposition of A
+inline std::vector<size_t> pivoted_cholesky(const Eigen::MatrixXd &A,
+                                            const double tolerance,
+                                            const std::vector<size_t> &pivot) {
+  // number of elements in A
+  const auto n = A.rows();
+  // diagonal elements of A
+  std::vector<double> d(n);
+  // initial error
+  auto error = A.diagonal()[0];
+  for (size_t i = 0; i < n; ++i) {
+    d[i] = A.diagonal()[i];
+    error = std::max(d[i], error);
+  }
+
+  // Return matrix
+  Eigen::MatrixXd L(n, n);
+
+  // loop index
+  size_t m = 0;
+
+  // copy input pivot indices
+  std::vector<size_t> piv;
+  piv.reserve(n);
+  for (size_t i = 0; i < n; ++i) {
+    piv.push_back(pivot[i]);
+  }
+
+  while (error > tolerance && m < n) {
+    // update pivot indices
+    // Errors in pivoted order
+    std::vector<double> err(d.size());
+    for (size_t i = 0; i < d.size(); ++i) {
+      err[i] = d[piv[i]];
+    }
+    // error vector after mth element
+    std::vector<double> err2(err.begin() + m, err.end());
+    std::vector<size_t> idx(err2.size());
+    for (size_t i = 0; i < idx.size(); ++i) {
+      idx[i] = i;
+    }
+    // sort indices
+    std::sort(idx.begin(), idx.end(),
+              [&err2](size_t i1, size_t i2) { return err2[i1] > err2[i2]; });
+    // subvector of piv
+    std::vector<size_t> piv_subvec(piv.size() - m);
+    for (size_t i = 0; i < piv_subvec.size(); ++i) {
+      piv_subvec[i] = piv[i + m];
+    }
+    // sort piv
+    for (size_t i = 0; i < idx.size(); ++i) {
+      piv[i + m] = piv_subvec[idx[i]];
+    }
+
+    // TODO: find a better way to update pivot indices
+
+    // current pivot index
+    size_t pim = piv[m];
+    // compute diagonal element
+    L(m, pim) = std::sqrt(d[pim]);
+
+    // off-diagonal elements
+    for (size_t i = m + 1; i < n; ++i) {
+      auto pii = piv[i];
+      // compute element
+      L(m, pii) =
+          (m > 0) ? (A(pim, pii) - L.col(pim).head(m).dot(L.col(pii).head(m))) /
+                        L(m, pim)
+                  : (A(pim, pii)) / L(m, pim);
+      // update d
+      d[pii] -= L(m, pii) * L(m, pii);
+    }
+    // update error
+    if (m + 1 < n) {
+      error = d[piv[m + 1]];
+      for (size_t i = m + 1; i < n; ++i) {
+        error = std::max(d[piv[i]], error);
+      }
+    }
+    // increase m
+    m++;
+  }
+  // Transpose to get Cholesky vectors as columns
+  L.transposeInPlace();
+  // Drop unnecessary columns
+  L.conservativeResize(n, m);
+
+  // return reduced pivot indices
+  std::vector<size_t> reduced_piv;
+  reduced_piv.reserve(m);
+  for (size_t i = 0; i < m; ++i) {
+    reduced_piv.push_back(piv[i]);
+  }
+
+  return reduced_piv;
+}
+
+}  // namespace libint2
+
+#endif  //_libint2_src_lib_libint_pivoted_cholesky_h_
diff --git a/include/libint2/shell.h b/include/libint2/shell.h
index 44c705b82..33422a143 100644
--- a/include/libint2/shell.h
+++ b/include/libint2/shell.h
@@ -33,12 +33,14 @@
 #include <cassert>
 #include <cmath>
 #include <iostream>
+#include <type_traits>
 #include <vector>
 
 namespace libint2 {
 
 namespace math {
-/// fac[k] = k!
+
+/// @brief fac[k] = k!, `0<=k<=20`
 static constexpr std::array<int64_t, 21> fac = {{1LL,
                                                  1LL,
                                                  2LL,
@@ -60,8 +62,224 @@ static constexpr std::array<int64_t, 21> fac = {{1LL,
                                                  6402373705728000LL,
                                                  121645100408832000LL,
                                                  2432902008176640000LL}};
-/// df_Kminus1[k] = (k-1)!!
-static constexpr std::array<int64_t, 31> df_Kminus1 = {{1LL,
+
+/// @brief fac_ld[k] = static_cast<long double>(k!), `0<=k<=170`
+/// @note This is an inexact representation for most values, so should only be
+/// used when `fac` does not
+///       suffice, i.e. for `k>=21`.
+/// @note This should be sufficient for computing with `double` reals
+static constexpr std::array<long double, 171> fac_ld = {
+    {1.l,
+     1.l,
+     2.l,
+     6.l,
+     24.l,
+     120.l,
+     720.l,
+     5040.l,
+     40320.l,
+     362880.l,
+     3.6288e6l,
+     3.99168e7l,
+     4.790016e8l,
+     6.2270208e9l,
+     8.71782912e10l,
+     1.307674368e12l,
+     2.0922789888e13l,
+     3.55687428096e14l,
+     6.402373705728e15l,
+     1.21645100408832e17l,
+     2.43290200817664e18l,
+     5.109094217170944e19l,
+     1.12400072777760768e21l,
+     2.585201673888497664e22l,
+     6.2044840173323943936e23l,
+     1.5511210043330985984e25l,
+     4.03291461126605635584e26l,
+     1.0888869450418352160768e28l,
+     3.04888344611713860501504e29l,
+     8.841761993739701954543616e30l,
+     2.6525285981219105863630848e32l,
+     8.22283865417792281772556288e33l,
+     2.6313083693369353016721801216e35l,
+     8.68331761881188649551819440128e36l,
+     2.9523279903960414084761860964352e38l,
+     1.03331479663861449296666513375232e40l,
+     3.719933267899012174679994481508352e41l,
+     1.37637530912263450463159795815809024e43l,
+     5.23022617466601111760007224100074291e44l,
+     2.03978820811974433586402817399028974e46l,
+     8.15915283247897734345611269596115894e47l,
+     3.34525266131638071081700620534407517e49l,
+     1.40500611775287989854314260624451157e51l,
+     6.04152630633738356373551320685139975e52l,
+     2.65827157478844876804362581101461589e54l,
+     1.19622220865480194561963161495657715e56l,
+     5.50262215981208894985030542880025489e57l,
+     2.5862324151116818064296435515361198e59l,
+     1.2413915592536072670862289047373375e61l,
+     6.08281864034267560872252163321295377e62l,
+     3.04140932017133780436126081660647688e64l,
+     1.55111875328738228022424301646930321e66l,
+     8.0658175170943878571660636856403767e67l,
+     4.27488328406002556429801375338939965e69l,
+     2.30843697339241380472092742683027581e71l,
+     1.2696403353658275925965100847566517e73l,
+     7.1099858780486345185404564746372495e74l,
+     4.05269195048772167556806019054323221e76l,
+     2.35056133128287857182947491051507468e78l,
+     1.38683118545689835737939019720389406e80l,
+     8.32098711274139014427634118322336438e81l,
+     5.07580213877224798800856812176625227e83l,
+     3.14699732603879375256531223549507641e85l,
+     1.98260831540444006411614670836189814e87l,
+     1.26886932185884164103433389335161481e89l,
+     8.24765059208247066672317030678549625e90l,
+     5.44344939077443064003729240247842753e92l,
+     3.64711109181886852882498590966054644e94l,
+     2.48003554243683059960099041856917158e96l,
+     1.71122452428141311372468338881272839e98l,
+     1.19785716699698917960727837216890987e100l,
+     8.5047858856786231752116764423992601e101l,
+     6.12344583768860868615240703852746727e103l,
+     4.47011546151268434089125713812505111e105l,
+     3.30788544151938641225953028221253782e107l,
+     2.48091408113953980919464771165940337e109l,
+     1.88549470166605025498793226086114656e111l,
+     1.45183092028285869634070784086308285e113l,
+     1.13242811782062978314575211587320462e115l,
+     8.94618213078297528685144171539831652e116l,
+     7.15694570462638022948115337231865322e118l,
+     5.79712602074736798587973423157810911e120l,
+     4.75364333701284174842138206989404947e122l,
+     3.94552396972065865118974711801206106e124l,
+     3.31424013456535326699938757913013129e126l,
+     2.81710411438055027694947944226061159e128l,
+     2.42270953836727323817655232034412597e130l,
+     2.1077572983795277172136005186993896e132l,
+     1.85482642257398439114796845645546284e134l,
+     1.65079551609084610812169192624536193e136l,
+     1.48571596448176149730952273362082574e138l,
+     1.35200152767840296255166568759495142e140l,
+     1.24384140546413072554753243258735531e142l,
+     1.15677250708164157475920516230624044e144l,
+     1.08736615665674308027365285256786601e146l,
+     1.03299784882390592625997020993947271e148l,
+     9.91677934870949689209571401541893801e149l,
+     9.61927596824821198533284259495636987e151l,
+     9.42689044888324774562618574305724247e153l,
+     9.33262154439441526816992388562667005e155l,
+     9.33262154439441526816992388562667005e157l,
+     9.42594775983835942085162312448293675e159l,
+     9.61446671503512660926865558697259548e161l,
+     9.90290071648618040754671525458177335e163l,
+     1.02990167451456276238485838647650443e166l,
+     1.08139675824029090050410130580032965e168l,
+     1.14628056373470835453434738414834943e170l,
+     1.22652020319613793935175170103873389e172l,
+     1.3246418194518289744998918371218326e174l,
+     1.44385958320249358220488210246279753e176l,
+     1.58824554152274294042537031270907729e178l,
+     1.76295255109024466387216104710707579e180l,
+     1.97450685722107402353682037275992488e182l,
+     2.23119274865981364659660702121871512e184l,
+     2.54355973347218755712013200418933523e186l,
+     2.92509369349301569068815180481773552e188l,
+     3.3931086844518982011982560935885732e190l,
+     3.96993716080872089540195962949863065e192l,
+     4.68452584975429065657431236280838416e194l,
+     5.57458576120760588132343171174197716e196l,
+     6.68950291344912705758811805409037259e198l,
+     8.09429852527344373968162284544935083e200l,
+     9.87504420083360136241157987144820801e202l,
+     1.21463043670253296757662432418812959e205l,
+     1.50614174151114087979501416199328069e207l,
+     1.88267717688892609974376770249160086e209l,
+     2.37217324288004688567714730513941708e211l,
+     3.01266001845765954480997707752705969e213l,
+     3.85620482362580421735677065923463641e215l,
+     4.97450422247728744039023415041268096e217l,
+     6.46685548922047367250730439553648525e219l,
+     8.47158069087882051098456875815279568e221l,
+     1.11824865119600430744996307607616903e224l,
+     1.48727070609068572890845089118130481e226l,
+     1.99294274616151887673732419418294845e228l,
+     2.6904727073180504835953876621469804e230l,
+     3.65904288195254865768972722051989335e232l,
+     5.01288874827499166103492629211225388e234l,
+     6.91778647261948849222819828311491036e236l,
+     9.6157231969410890041971956135297254e238l,
+     1.34620124757175246058760738589416156e241l,
+     1.89814375907617096942852641411076779e243l,
+     2.69536413788816277658850750803729027e245l,
+     3.85437071718007277052156573649332508e247l,
+     5.55029383273930478955105466055038812e249l,
+     8.04792605747199194484902925779806277e251l,
+     1.17499720439091082394795827163851716e254l,
+     1.72724589045463891120349865930862023e256l,
+     2.55632391787286558858117801577675794e258l,
+     3.80892263763056972698595524350736934e260l,
+     5.713383956445854590478932865261054e262l,
+     8.62720977423324043162318862654419154e264l,
+     1.31133588568345254560672467123471711e267l,
+     2.00634390509568239477828874698911719e269l,
+     3.08976961384735088795856467036324047e271l,
+     4.78914290146339387633577523906302272e273l,
+     7.47106292628289444708380937293831545e275l,
+     1.17295687942641442819215807155131553e278l,
+     1.85327186949373479654360975305107853e280l,
+     2.94670227249503832650433950735121486e282l,
+     4.71472363599206132240694321176194378e284l,
+     7.59070505394721872907517857093672949e286l,
+     1.22969421873944943411017892849175018e289l,
+     2.00440157654530257759959165344155279e291l,
+     3.28721858553429622726333031164414657e293l,
+     5.42391066613158877498449501421284184e295l,
+     9.00369170577843736647426172359331746e297l,
+     1.50361651486499904020120170784008402e300l,
+     2.52607574497319838753801886917134115e302l,
+     4.26906800900470527493925188889956654e304l,
+     7.25741561530799896739672821112926311e306l}};
+
+/// @brief computes factorial as an integer
+
+/// @param k non-negative integer
+/// @pre `k<fac.size()`
+/// @return factorial of \p k
+template <typename Int, typename = typename std::enable_if<
+                            std::is_integral<Int>::value>::type>
+constexpr int64_t fac_int(Int k) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || k >= 0);
+  assert(k < fac.size());
+#endif
+  return fac[k];
+}
+
+/// @brief computes factorial as a real number
+
+/// @param k non-negative integer
+/// @pre `k<fac_ld.size()`
+/// @return factorial of \p k , uses exact representation when possible
+template <
+    typename Real, typename Int,
+    typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+constexpr Real fac_real(Int k) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || k >= 0);
+  assert(k < fac.size() || k < fac_ld.size());
+  // for extended-precision types should do better ...
+  assert(k < fac.size() || (std::is_same<Real, double>::value ||
+                            std::is_same<Real, long double>::value));
+#endif
+
+  // use exact representation when available
+  return (k < fac.size()) ? static_cast<Real>(fac[k])
+                          : static_cast<Real>(fac_ld[k]);
+}
+
+/// `df_Kminus1[k] = (k-1)!!`, `0<=k<=34`
+static constexpr std::array<int64_t, 35> df_Kminus1 = {{1LL,
                                                         1LL,
                                                         1LL,
                                                         2LL,
@@ -91,15 +309,391 @@ static constexpr std::array<int64_t, 31> df_Kminus1 = {{1LL,
                                                         51011754393600LL,
                                                         213458046676875LL,
                                                         1428329123020800LL,
-                                                        6190283353629375LL}};
-/// bc(i,j) = binomial coefficient, i! / (j! (i-j)!)
-template <typename Int>
-int64_t bc(Int i, Int j) {
-  assert(i < Int(fac.size()));
-  assert(j < Int(fac.size()));
+                                                        6190283353629375LL,
+                                                        42849873690624000LL,
+                                                        191898783962510625LL,
+                                                        1371195958099968000LL,
+                                                        6332659870762850625LL}};
+
+/// @brief fac_ld[k] = static_cast<long double>(k!), `0<=k<=170`
+/// @note This is an inexact representation for most values, so should only be
+/// used when `fac` does not
+///       suffice, i.e. for `k>=21`.
+/// @note This should be sufficient for computing with `double` reals
+static constexpr std::array<long double, 302> df_Kminus1_ld = {
+    {1.l,
+     1.l,
+     1.l,
+     2.l,
+     3.l,
+     8.l,
+     15.l,
+     48.l,
+     105.l,
+     384.l,
+     945.l,
+     3840.l,
+     10395.l,
+     46080.l,
+     135135.l,
+     645120.l,
+     2.027025e6l,
+     1.032192e7l,
+     3.4459425e7l,
+     1.8579456e8l,
+     6.54729075e8l,
+     3.7158912e9l,
+     1.3749310575e10l,
+     8.17496064e10l,
+     3.16234143225e11l,
+     1.9619905536e12l,
+     7.905853580625e12l,
+     5.10117543936e13l,
+     2.13458046676875e14l,
+     1.4283291230208e15l,
+     6.190283353629375e15l,
+     4.2849873690624e16l,
+     1.91898783962510625e17l,
+     1.371195958099968e18l,
+     6.332659870762850625e18l,
+     4.6620662575398912e19l,
+     2.21643095476699771875e20l,
+     1.678343852714360832e21l,
+     8.200794532637891559375e21l,
+     6.3777066403145711616e22l,
+     3.19830986772877770815625e23l,
+     2.55108265612582846464e24l,
+     1.3113070457687988603440625e25l,
+     1.0714547155728479551488e26l,
+     5.63862029680583509947946875e26l,
+     4.71440074852053100265472e27l,
+     2.5373791335626257947657609375e28l,
+     2.1686243443194442612211712e29l,
+     1.192568192774434123539907640625e30l,
+     1.040939685273333245386162176e31l,
+     5.8435841445947272053455474390625e31l,
+     5.20469842636666622693081088e32l,
+     2.980227913743310874726229193921875e33l,
+     2.7064431817106664380040216576e34l,
+     1.57952079428395476360490147277859375e35l,
+     1.461479318123759876522171695104e36l,
+     8.68736436856175119982695810028226562e36l,
+     8.1842841814930553085241614925824e37l,
+     4.95179769008019818390136611716089141e38l,
+     4.746884825265972078944013665697792e39l,
+     2.92156063714731692850180600912492593e40l,
+     2.8481308951595832473664081994186752e41l,
+     1.78215198865986332638610166556620482e42l,
+     1.76584115499894161336717308363957862e43l,
+     1.12275575285571389562324404930670903e44l,
+     1.13013833919932263255499077352933032e45l,
+     7.29791239356214032155108632049360873e45l,
+     7.45891303871552937486293910529358011e46l,
+     4.88960130368663401543922783473071785e47l,
+     5.07206086632655997490679859159963447e48l,
+     3.37382489954377747065306720596419531e49l,
+     3.55044260642859198243475901411974413e50l,
+     2.39541567867608200416367771623457867e51l,
+     2.55631867662858622735302649016621577e52l,
+     1.74865344543353986303948473285124243e53l,
+     1.89167582070515380824123960272299967e54l,
+     1.31149008407515489727961354963843182e55l,
+     1.43767362373591689426334209806947975e56l,
+     1.0098473647378692709053024332215925e57l,
+     1.12138542651401517752540683649419421e58l,
+     7.97779418142916724015188922245058078e58l,
+     8.97108341211212142020325469195355365e59l,
+     6.46201328695762546452303027018497043e60l,
+     7.35628839793193956456666884740191399e61l,
+     5.36347102817482913555411512425352546e62l,
+     6.17928225426282923423600183181760775e63l,
+     4.55895037394860476522099785561549664e64l,
+     5.31418273866603314144296157536314267e65l,
+     3.96628682533528614574226813438548208e66l,
+     4.67648081002610916446980618631956555e67l,
+     3.52999527454840466971061863960307905e68l,
+     4.20883272902349824802282556768760899e69l,
+     3.21229569983904824943666296203880193e70l,
+     3.87212611070161838818099952227260027e71l,
+     2.9874350008503148719760965546960858e72l,
+     3.63979854405952128489013955093624426e73l,
+     2.83806325080779912837729172696128151e74l,
+     3.49420660229714043349453396889879449e75l,
+     2.75292135328356515452597297515244306e76l,
+     3.4243224702511976248246432895208186e77l,
+     2.72539213975072950298071324540091863e78l,
+     3.4243224702511976248246432895208186e79l,
+     2.75264606114823679801052037785492782e80l,
+     3.49280891965622157732113615531123497e81l,
+     2.83522544298268390195083598919057565e82l,
+     3.63252127644247044041398160152368437e83l,
+     2.97698671513181809704837778865010444e84l,
+     3.85047255302901866683882049761510543e85l,
+     3.18537578519104536384176423385561175e86l,
+     4.15851035727134016018592613742431386e87l,
+     3.4720596058582394465875230149026168e88l,
+     4.57436139299847417620451875116674525e89l,
+     3.85398616250264578571215054654190465e90l,
+     5.12328476015829107734906100130675468e91l,
+     4.35500436362798973785473011759235226e92l,
+     5.84054462658045182817792954148970034e93l,
+     5.0082550181721881985329396352312051e94l,
+     6.77503176683332412068639826812805239e95l,
+     5.85965837126146019228353937322050996e96l,
+     7.99453748486332246240994995639110182e97l,
+     6.97299346180113762881741185413240686e98l,
+     9.59344498183598695489193994766932219e99l,
+     8.4373220887793765308690683435002123e100l,
+     1.17040028778399040849681667361565731e102l,
+     1.03779061691986331329689540625052611e103l,
+     1.45129635685214810653605267528341506e104l,
+     1.29723827114982914162111925781315764e105l,
+     1.82863340963370661423542637085710298e106l,
+     1.6474926043602830098588214574227102e107l,
+     2.34065076433114446622134575469709181e108l,
+     2.12526545962476508271787968007529616e109l,
+     3.04284599363048780608774948110621935e110l,
+     2.78409775210844225836042238089863797e111l,
+     4.01655671159224390403582931506020954e112l,
+     3.7028500103042282036193617665951885e113l,
+     5.38218599353360683140801128218068079e114l,
+     4.99884751391070807488613838490350448e115l,
+     7.31977295120570529071489534376572587e116l,
+     6.84842109405767006259400958731780114e117l,
+     1.01012866726638733011865555743967017e119l,
+     9.51930532074016138700567332637174358e119l,
+     1.41418013417294226216611778041553824e121l,
+     1.34222205022436275556779993901841585e122l,
+     2.0081357905255780122758872481900643e123l,
+     1.91937753182083874046195391279633466e124l,
+     2.89171553835683233767727763739369259e125l,
+     2.78309742114021617366983317355468525e126l,
+     4.22190468600097521300882535059479118e127l,
+     4.09115320907611777529465476512538732e128l,
+     6.24841893528144331525306151888029095e129l,
+     6.09581828152341548518903560003682711e130l,
+     9.37262840292216497287959227832043642e131l,
+     9.20468560510035738263544375605560894e132l,
+     1.42463951724416907587769802630470634e134l,
+     1.40831689758035467954322289467650817e135l,
+     2.19394485655602037685165496050924776e136l,
+     2.18289119124954975329199548674858766e137l,
+     3.4225539762273917878885817383944265e138l,
+     3.42713917026179311266843291419528263e139l,
+     5.40763528243927902486395914666319387e140l,
+     5.44915128071625104914280833357049938e141l,
+     8.6522164519028464397823346346611102e142l,
+     8.773133561953164189119921417048504e143l,
+     1.40165906520826112324473821081509985e145l,
+     1.43002077059836576282654719097890615e146l,
+     2.29872086694154824212137066573676376e147l,
+     2.35953427148730350866380286511519515e148l,
+     3.81587663912297008192147530512302784e149l,
+     3.9404222333837968594685507847423759e150l,
+     6.41067275372658973762807851260668677e151l,
+     6.65931357441861669250185082621461527e152l,
+     1.08981436813352025539677334714313675e154l,
+     1.13874262122558345441781649128269921e155l,
+     1.87448071318965483928245015708619521e156l,
+     1.97002473472025937614282252991906964e157l,
+     3.26159644094999942035146327332997967e158l,
+     3.44754328576045390824993942735837186e159l,
+     5.74040973607199897981857536106076421e160l,
+     6.1021516157960034176023927864243182e161l,
+     1.02179293302081581840770641426881603e163l,
+     1.09228513922748461175082830876995296e164l,
+     1.83922727943746847313387154568386885e165l,
+     1.97703610200174714726899923887361485e166l,
+     3.34739364857619262110364621314464131e167l,
+     3.61797606666319727950226860713871518e168l,
+     6.15920431338019442283070903218614002e169l,
+     6.69325572332691496707919692320662308e170l,
+     1.14561200228871616264651187998662204e172l,
+     1.25163882026213309884380982463963852e173l,
+     2.15375056430278638577544233437484944e174l,
+     2.3655973702954315568148005685689168e175l,
+     4.09212607217529413297334043531221394e176l,
+     4.51829097726427427351626908596663108e177l,
+     7.85688205857656473530881363579945076e178l,
+     8.72030158612004934788639933591559799e179l,
+     1.52423511936385355864990984534509345e181l,
+     1.70045880929340962283784787050354161e182l,
+     2.98750083395315297495382329687638316e183l,
+     3.34990385430801695699056030489197697e184l,
+     5.91525165122724289040857012781523865e185l,
+     6.66630867007295374441121500673503416e186l,
+     1.18305033024544857808171402556304773e188l,
+     1.33992804268466370262665421635374187e189l,
+     2.38976166709580612772506233163735642e190l,
+     2.72005392664986731633210805919809599e191l,
+     4.87511380087544450055912715654020709e192l,
+     5.57611054963222799848082152135609678e193l,
+     1.00427344298034156711518019424728266e195l,
+     1.15425488377387119568553005492071203e196l,
+     2.08888876139911045959957480403434793e197l,
+     2.41239270708739079898275781478428815e198l,
+     4.38666639893813196515910708847213066e199l,
+     5.090148611954394585853618989194848e200l,
+     9.299732765748839766137307027560917e201l,
+     1.08420165434628604678682084469850262e203l,
+     1.99014281187025170995338370389803624e204l,
+     2.33103355684451500059166481610178064e205l,
+     4.29870847363974369349930880041975827e206l,
+     5.05834281835259755128391265094086399e207l,
+     9.37118447253464125182849318491507304e208l,
+     1.10777707721921886373117687055604921e210l,
+     2.06166058395762107540226850068131607e211l,
+     2.44818734065447368884590088392886876e212l,
+     4.57688649638591878739303607151252167e213l,
+     5.45945776965947632612635897116137734e214l,
+     1.02522257519044580837604008001880485e216l,
+     1.2283779981733821733784307685113099e217l,
+     2.31700301993040752692985058084249897e218l,
+     2.78841805585357753356903784452067348e219l,
+     5.28276688544132916140005932432089765e220l,
+     6.38547734790469255187309666395234226e221l,
+     1.21503638365150570712201364459380646e223l,
+     1.47504526736598397948268532937299106e224l,
+     2.81888441007149324052307165545763099e225l,
+     3.43685547296274267219465681743906917e226l,
+     6.59618951956729418282398767377085651e227l,
+     8.07661036146244527965744352098181256e228l,
+     1.55670072661788142714646109100992214e230l,
+     1.91415665566659953127881411447268958e231l,
+     3.70494772935055779660857739660361469e232l,
+     4.57483440704317287975636573358972809e233l,
+     8.89187455044133871186058575184867524e234l,
+     1.10253509209740466402128414179512447e236l,
+     2.15183364120680396827026175194737941e237l,
+     2.67916027379669333357172046456215246e238l,
+     5.25047408454460168257943867475160576e239l,
+     6.56394267080189866725071513817727353e240l,
+     1.29161662479797201391454191398889502e242l,
+     1.62129383968806897081092663912978656e243l,
+     3.20320922949897059450806394669245964e244l,
+     4.03702166082329173731920733143316854e245l,
+     8.0080230737474264862701598667311491e246l,
+     1.0132924368666462260671210401897253e248l,
+     2.01802181458435147454008028641624957e249l,
+     2.56362986527261495194981623168000502e250l,
+     5.12577540904425274533180392749727392e251l,
+     6.53725615644516812747203139078401279e252l,
+     1.31219850471532870280494180543930212e254l,
+     1.68007483220640820876031206743149129e255l,
+     3.38547214216554805323674985803339948e256l,
+     4.35139381541459726068920825464756243e257l,
+     8.80222756963042493841554963088683864e258l,
+     1.1357137858232098850398833544630138e260l,
+     2.30618362324317133386487400329235172e261l,
+     2.98692725671504199765489322223772628e262l,
+     6.08832476536197232140326736869180855e263l,
+     7.91535723029486129378546703892997465e264l,
+     1.61949438758628463749326912007202107e266l,
+     2.11340038048872796544071969939430323e267l,
+     4.34024495873124282848196124179301648e268l,
+     5.68504702351467822703553599137067569e269l,
+     1.17186613885743556369012953528411445e271l,
+     1.54064774337247779952663025366145311e272l,
+     3.1874758976922247332371523359727913e273l,
+     4.205968339406864392707700592495767e274l,
+     8.73368395967669576906979740056544817e275l,
+     1.15664129333688770799461766293633592e277l,
+     2.41049677287076803226326408255606369e278l,
+     3.20389638254317895114509092633365051e279l,
+     6.70118102858073512969187414950585707e280l,
+     8.93887090729546927369480368447088492e281l,
+     1.87633068800260583631372476186163998e283l,
+     2.51182272495002686590823983533631866e284l,
+     5.29125254016734845840470382844982474e285l,
+     7.10845831160857603052031873400178182e286l,
+     1.50271572140752696218693588727975023e288l,
+     2.02591061880844416869829083919050782e289l,
+     4.29776696322552711185463663762008565e290l,
+     5.81436347598023476416409470847675744e291l,
+     1.23775688540895180821413535163458467e293l,
+     1.6803510445582878468434233707497829e294l,
+     3.58949496768596024382099251974029553e295l,
+     4.88982153966461763431436200888186824e296l,
+     1.0481325305643003911957298157641663e298l,
+     1.43271771112173296685410806860238739e299l,
+     3.08150963985904315011544565834664891e300l,
+     4.22651724780911225221961880237704281e301l,
+     9.12126853398276772434171914870608078e302l,
+     1.25527562259930633890922678430598171e304l,
+     2.71813802312686478185383230631441207e305l,
+     3.75327411157192595333858808507488533e306l,
+     8.15441406938059434556149691894323621e307l}};
+
+/// @brief computes double factorial as an integer
+
+/// @param k non-negative integer
+/// @pre `k<df_Kminus1.size()`
+/// @return double factorial of `k-1`; N.B.: `-1!! = 1`
+template <typename Int, typename = typename std::enable_if<
+                            std::is_integral<Int>::value>::type>
+constexpr int64_t df_Kminus1_int(Int k) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || k >= 0);
+  assert(k < df_Kminus1.size());
+#endif
+  return df_Kminus1[k];
+}
+
+/// @brief computes double factorial as a real number
+
+/// @param k non-negative integer
+/// @pre `k<df_Kminus1.size()`
+/// @return double factorial of `k-1` (note that `-1!!=1`), uses exact
+/// representation when possible
+template <
+    typename Real, typename Int,
+    typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+constexpr Real df_Kminus1_real(Int k) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || k >= 0);
+  assert(k < df_Kminus1.size() || k < df_Kminus1_ld.size());
+  // for extended-precision types should do better ...
+  assert(k < df_Kminus1.size() || (std::is_same<Real, double>::value ||
+                                   std::is_same<Real, long double>::value));
+#endif
+  // use exact representation when available
+  return (k < df_Kminus1.size()) ? static_cast<Real>(df_Kminus1[k])
+                                 : static_cast<Real>(df_Kminus1_ld[k]);
+}
+
+/// @brief computes binomial coefficient as an integer
+
+/// @param i non-negative integer
+/// @param j non-negative integer
+/// @pre `i>=j`
+/// return `i! / (j! * (i-j)!)`
+template <typename Int, typename = typename std::enable_if<
+                            std::is_integral<Int>::value>::type>
+constexpr int64_t bc_int(Int i, Int j) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
   assert(i >= j);
-  return fac[i] / (fac[j] * fac[i - j]);
+#endif
+  return fac_int(i) / (fac_int(j) * fac_int(i - j));
 }
+
+/// @brief computes binomial coefficient as a real number
+
+/// @param i non-negative integer
+/// @param j non-negative integer
+/// @pre `i>=j`
+/// @return binomial coefficient, (\p i , \p j), uses exact representation when
+/// possible
+template <
+    typename Real, typename Int,
+    typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+constexpr Real bc_real(Int i, Int j) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || (i >= 0 && j >= 0));
+  assert(i >= j);
+#endif
+  return fac_real<Real>(i) / (fac_real<Real>(j) * fac_real<Real>(i - j));
+}
+
 }  // namespace math
 
 /// generally-contracted Solid-Harmonic/Cartesion Gaussian Shell
@@ -319,15 +913,14 @@ struct Shell {
     const auto alpha = this->alpha.at(p);
     assert(alpha >= 0.0);
     const auto l = contr.at(c).l;
-    assert(l <= 15);  // due to df_Kminus1[] a 64-bit integer type; kinda
-                      // ridiculous restriction anyway
 
-    using libint2::math::df_Kminus1;
+    using libint2::math::df_Kminus1_real;
     const auto sqrt_Pi_cubed = real_t{5.56832799683170784528481798212};
     const auto two_alpha = 2 * alpha;
     const auto two_alpha_to_am32 = pow(two_alpha, l + 1) * sqrt(two_alpha);
-    const auto one_over_N = sqrt((sqrt_Pi_cubed * df_Kminus1[2 * l]) /
-                                 (pow(2, l) * two_alpha_to_am32));
+    const auto one_over_N =
+        sqrt((sqrt_Pi_cubed * df_Kminus1_real<real_t>(2 * l)) /
+             (pow(2, l) * two_alpha_to_am32));
     return contr.at(c).coeff[p] * one_over_N;
   }
 
@@ -360,13 +953,11 @@ struct Shell {
   /// before computing integrals. \warning Must be done only once. \note this is
   /// now private
   void renorm() {
-    using libint2::math::df_Kminus1;
+    using libint2::math::df_Kminus1_real;
     using std::pow;
     const auto sqrt_Pi_cubed = real_t{5.56832799683170784528481798212};
     const auto np = nprim();
     for (auto& c : contr) {
-      assert(c.l <= 15);  // due to df_Kminus1[] a 64-bit integer type; kinda
-                          // ridiculous restriction anyway
       for (auto p = 0ul; p != np; ++p) {
         assert(alpha[p] >= 0);
         if (alpha[p] != 0) {
@@ -375,7 +966,7 @@ struct Shell {
               pow(two_alpha, c.l + 1) * sqrt(two_alpha);
           const auto normalization_factor =
               sqrt(pow(2, c.l) * two_alpha_to_am32 /
-                   (sqrt_Pi_cubed * df_Kminus1[2 * c.l]));
+                   (sqrt_Pi_cubed * df_Kminus1_real<real_t>(2 * c.l)));
 
           c.coeff[p] *= normalization_factor;
         }
@@ -389,8 +980,8 @@ struct Shell {
         for (auto p = 0ul; p != np; ++p) {
           for (decltype(p) q = 0ul; q <= p; ++q) {
             auto gamma = alpha[p] + alpha[q];
-            norm += (p == q ? 1 : 2) * df_Kminus1[2 * c.l] * sqrt_Pi_cubed *
-                    c.coeff[p] * c.coeff[q] /
+            norm += (p == q ? 1 : 2) * df_Kminus1_real<real_t>(2 * c.l) *
+                    sqrt_Pi_cubed * c.coeff[p] * c.coeff[q] /
                     (pow(2, c.l) * pow(gamma, c.l + 1) * sqrt(gamma));
           }
         }
@@ -621,7 +1212,7 @@ struct ShellPair {
                        std::abs(P[2] - B[2])),
               max_l2);
           const auto fac_l1_fac_l2_oogamma_to_l =
-              math::fac[max_l1] * math::fac[max_l2] *
+              math::fac_real<Real>(max_l1) * math::fac_real<Real>(max_l2) *
               std::pow(oogamma, max_l1 + max_l2);
           nonspherical_scr_factor =
               std::max(maxabs_PA_i_to_l1 * maxabs_PB_i_to_l2,
diff --git a/include/libint2/soad_fock.h b/include/libint2/soad_fock.h
new file mode 100644
index 000000000..f3796e539
--- /dev/null
+++ b/include/libint2/soad_fock.h
@@ -0,0 +1,183 @@
+/*
+ *  Copyright (C) 2004-2024 Edward F. Valeev
+ *
+ *  This file is part of Libint.
+ *
+ *  Libint is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Libint is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _libint2_src_lib_libint_soad_fock_h_
+#define _libint2_src_lib_libint_soad_fock_h_
+
+#include <libint2/atom.h>
+#include <libint2/basis.h>
+#include <libint2/chemistry/sto3g_atomic_density.h>
+
+#include <Eigen/Dense>
+
+namespace libint2 {
+
+/// @brief computes Superposition-Of-Atomic-Densities guess for the molecular
+/// 1-RDM
+
+/// The atomic densities are represented by the STO-3G AOs;
+/// occupies subshells by smearing electrons (of neutral ground-state atoms)
+/// evenly over the subshell components
+/// @param[in] atoms list of atoms
+/// @return the 1-RDM in the STO-3G AO basis
+/// @return the 1-RDM, normalized to the # of electrons/2
+Eigen::MatrixXd compute_soad(const std::vector<Atom> &atoms) {
+  // compute number of atomic orbitals
+  size_t nao = 0;
+  for (const auto &atom : atoms) {
+    const auto Z = atom.atomic_number;
+    nao += libint2::sto3g_num_ao(Z);
+  }
+
+  // compute the minimal basis density
+  Eigen::MatrixXd D = Eigen::MatrixXd::Zero(nao, nao);
+  size_t ao_offset = 0;  // first AO of this atom
+  for (const auto &atom : atoms) {
+    const auto Z = atom.atomic_number;
+    const auto &occvec = libint2::sto3g_ao_occupation_vector(Z);
+    for (const auto &occ : occvec) {
+      D(ao_offset, ao_offset) = occ;
+      ++ao_offset;
+    }
+  }
+
+  return D * 0.5;  // we use densities normalized to # of electrons/2
+}
+
+Eigen::MatrixXd compute_2body_fock_general(const BasisSet &obs,
+                                           const Eigen::MatrixXd &D,
+                                           const BasisSet &D_bs,
+                                           bool D_is_shelldiagonal,
+                                           double precision) {
+  const auto n = obs.nbf();
+  const auto nshells = obs.size();
+  const auto n_D = D_bs.nbf();
+  assert(D.cols() == D.rows() && D.cols() == n_D);
+
+  Eigen::MatrixXd G = Eigen::MatrixXd ::Zero(n, n);
+
+  // construct the 2-electron repulsion integrals engine
+  libint2::Engine engine(libint2::Operator::coulomb,
+                         std::max(obs.max_nprim(), D_bs.max_nprim()),
+                         std::max(obs.max_l(), D_bs.max_l()), 0);
+  engine.set_precision(precision);
+  auto shell2bf = obs.shell2bf();
+  auto shell2bf_D = D_bs.shell2bf();
+
+  const auto &buf = engine.results();
+
+  // loop over permutationally-unique set of shells
+  for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
+    auto bf1_first = shell2bf[s1];  // first basis function in this shell
+    auto n1 = obs[s1].size();       // number of basis functions in this shell
+
+    for (auto s2 = 0; s2 <= s1; ++s2) {
+      auto bf2_first = shell2bf[s2];
+      auto n2 = obs[s2].size();
+
+      for (auto s3 = 0; s3 < D_bs.size(); ++s3) {
+        auto bf3_first = shell2bf_D[s3];
+        auto n3 = D_bs[s3].size();
+
+        auto s4_begin = D_is_shelldiagonal ? s3 : 0;
+        auto s4_fence = D_is_shelldiagonal ? s3 + 1 : D_bs.size();
+
+        for (auto s4 = s4_begin; s4 != s4_fence; ++s4, ++s1234) {
+          auto bf4_first = shell2bf_D[s4];
+          auto n4 = D_bs[s4].size();
+
+          // compute the permutational degeneracy (i.e. # of equivalents) of
+          // the given shell set
+          auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
+
+          if (s3 >= s4) {
+            auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
+            auto s1234_deg = s12_deg * s34_deg;
+            // auto s1234_deg = s12_deg;
+            engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
+                obs[s1], obs[s2], D_bs[s3], D_bs[s4]);
+            const auto *buf_1234 = buf[0];
+            if (buf_1234 != nullptr) {
+              for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
+                const auto bf1 = f1 + bf1_first;
+                for (auto f2 = 0; f2 != n2; ++f2) {
+                  const auto bf2 = f2 + bf2_first;
+                  for (auto f3 = 0; f3 != n3; ++f3) {
+                    const auto bf3 = f3 + bf3_first;
+                    for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
+                      const auto bf4 = f4 + bf4_first;
+
+                      const auto value = buf_1234[f1234];
+                      const auto value_scal_by_deg = value * s1234_deg;
+                      G(bf1, bf2) += 2.0 * D(bf3, bf4) * value_scal_by_deg;
+                    }
+                  }
+                }
+              }
+            }
+          }
+
+          engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
+              obs[s1], D_bs[s3], obs[s2], D_bs[s4]);
+          const auto *buf_1324 = buf[0];
+          if (buf_1324 == nullptr)
+            continue;  // if all integrals screened out, skip to next quartet
+
+          for (auto f1 = 0, f1324 = 0; f1 != n1; ++f1) {
+            const auto bf1 = f1 + bf1_first;
+            for (auto f3 = 0; f3 != n3; ++f3) {
+              const auto bf3 = f3 + bf3_first;
+              for (auto f2 = 0; f2 != n2; ++f2) {
+                const auto bf2 = f2 + bf2_first;
+                for (auto f4 = 0; f4 != n4; ++f4, ++f1324) {
+                  const auto bf4 = f4 + bf4_first;
+
+                  const auto value = buf_1324[f1324];
+                  const auto value_scal_by_deg = value * s12_deg;
+                  G(bf1, bf2) -= D(bf3, bf4) * value_scal_by_deg;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // symmetrize the result and return
+  return 0.5 * (G + G.transpose());
+}
+
+Eigen::MatrixXd compute_soad_fock(const BasisSet &obs, const BasisSet &minbs,
+                                  const std::vector<Atom> &atoms) {
+  // use SOAD as guess for the density matrix
+  auto D = compute_soad(atoms);
+  // compute fock with guess density
+  auto F = compute_2body_fock_general(
+      obs, D, minbs, true /* SOAD_D_is_shelldiagonal */,
+      std::numeric_limits<double>::epsilon()  // this is cheap, no reason
+                                              // to be cheaper
+  );
+  return F;
+}
+
+}  // namespace libint2
+
+#endif  //_libint2_src_lib_libint_soad_fock_h_
diff --git a/include/libint2/solidharmonics.h b/include/libint2/solidharmonics.h
index f97dc7632..ddf06443e 100644
--- a/include/libint2/solidharmonics.h
+++ b/include/libint2/solidharmonics.h
@@ -83,21 +83,25 @@ class SolidHarmonicsCoefficients {
   static const SolidHarmonicsCoefficients& instance(unsigned int l) {
     static std::vector<SolidHarmonicsCoefficients> shg_coefs(
         SolidHarmonicsCoefficients::CtorHelperIter(0),
-        SolidHarmonicsCoefficients::CtorHelperIter(11));
-    assert(l <= 10);  // see coeff() for explanation of the upper limit on l
+        SolidHarmonicsCoefficients::CtorHelperIter(LIBINT_HARD_MAX_AM + 1));
+    assert(l < shg_coefs.size());
     return shg_coefs[l];
   }
 
+  static SolidHarmonicsCoefficients make_instance(unsigned int l) {
+    return SolidHarmonicsCoefficients(l);
+  }
+
   /// returns ptr to row values
   const Real* row_values(size_t r) const {
     return &values_[0] + row_offset_[r];
   }
   /// returns ptr to row indices
-  const unsigned char* row_idx(size_t r) const {
+  const unsigned int* row_idx(size_t r) const {
     return &colidx_[0] + row_offset_[r];
   }
   /// number of nonzero elements in row \c r
-  unsigned char nnz(size_t r) const {
+  unsigned int nnz(size_t r) const {
     return row_offset_[r + 1] - row_offset_[r];
   }
 
@@ -108,9 +112,9 @@ class SolidHarmonicsCoefficients {
    harmonic Ylm is requested.
    ---------------------------------------------------------------------------------------------*/
   static Real coeff(int l, int m, int lx, int ly, int lz) {
-    using libint2::math::bc;
-    using libint2::math::df_Kminus1;
-    using libint2::math::fac;
+    using libint2::math::bc_real;
+    using libint2::math::df_Kminus1_real;
+    using libint2::math::fac_real;
 
     auto abs_m = std::abs(m);
     if ((lx + ly - abs_m) % 2) return 0.0;
@@ -127,13 +131,17 @@ class SolidHarmonicsCoefficients {
     auto i = abs_m - lx;
     if (comp != parity(abs(i))) return 0.0;
 
-    assert(l <= 10);  // libint2::math::fac[] is only defined up to 20
-    Real pfac =
-        sqrt(((Real(fac[2 * lx]) * Real(fac[2 * ly]) * Real(fac[2 * lz])) /
-              fac[2 * l]) *
-             ((Real(fac[l - abs_m])) / (fac[l])) * (Real(1) / fac[l + abs_m]) *
-             (Real(1) / (fac[lx] * fac[ly] * fac[lz])));
-    /*  pfac = sqrt(fac[l-abs_m]/(fac[l]*fac[l]*fac[l+abs_m]));*/
+    Real pfac;
+    pfac = sqrt(((fac_real<Real>(2 * lx) * fac_real<Real>(2 * ly) *
+                  fac_real<Real>(2 * lz)) /
+                 fac_real<Real>(2 * l)) *
+                ((fac_real<Real>(l - abs_m)) / (fac_real<Real>(l))) *
+                (Real(1) / fac_real<Real>(l + abs_m)) *
+                (Real(1) / (fac_real<Real>(lx) * fac_real<Real>(ly) *
+                            fac_real<Real>(lz))));
+
+    /*  pfac =
+     * sqrt(fac_real<Real>(l-abs_m)/(fac_real<Real>(l)*fac_real<Real>(l)*fac[l+abs_m]));*/
     pfac /= (1L << l);
     if (m < 0)
       pfac *= parity((i - 1) / 2);
@@ -144,19 +152,22 @@ class SolidHarmonicsCoefficients {
     auto i_max = (l - abs_m) / 2;
     Real sum = 0;
     for (auto i = i_min; i <= i_max; i++) {
-      Real pfac1 = bc(l, i) * bc(i, j);
-      pfac1 *= (Real(parity(i) * fac[2 * (l - i)]) / fac[l - abs_m - 2 * i]);
+      Real pfac1 = bc_real<Real>(l, i) * bc_real<Real>(i, j);
+      pfac1 *= (Real(parity(i) * fac_real<Real>(2 * (l - i))) /
+                fac_real<Real>(l - abs_m - 2 * i));
       Real sum1 = 0.0;
       const int k_min = std::max((lx - abs_m) / 2, 0);
       const int k_max = std::min(j, lx / 2);
       for (int k = k_min; k <= k_max; k++) {
         if (lx - 2 * k <= abs_m)
-          sum1 += bc(j, k) * bc(abs_m, lx - 2 * k) * parity(k);
+          sum1 += bc_real<Real>(j, k) * bc_real<Real>(abs_m, lx - 2 * k) *
+                  parity(k);
       }
       sum += pfac1 * sum1;
     }
-    sum *= sqrt(Real(df_Kminus1[2 * l]) /
-                (df_Kminus1[2 * lx] * df_Kminus1[2 * ly] * df_Kminus1[2 * lz]));
+    sum *= sqrt(df_Kminus1_real<Real>(2 * l) /
+                (df_Kminus1_real<Real>(2 * lx) * df_Kminus1_real<Real>(2 * ly) *
+                 df_Kminus1_real<Real>(2 * lz)));
 
     Real result = (m == 0) ? pfac * sum : M_SQRT2 * pfac * sum;
     return result;
@@ -164,25 +175,24 @@ class SolidHarmonicsCoefficients {
 
  private:
   std::vector<Real> values_;  // elements
-  std::vector<unsigned short>
-      row_offset_;                     // "pointer" to the beginning of each row
-  std::vector<unsigned char> colidx_;  // column indices
-  signed char l_;                      // the angular momentum quantum number
+  std::vector<unsigned int>
+      row_offset_;                    // "pointer" to the beginning of each row
+  std::vector<unsigned int> colidx_;  // column indices
+  int l_;                             // the angular momentum quantum number
 
   void init() {
-    const unsigned short npure = 2 * l_ + 1;
-    const unsigned short ncart = (l_ + 1) * (l_ + 2) / 2;
+    const unsigned int npure = 2 * l_ + 1;
+    const unsigned int ncart = (l_ + 1) * (l_ + 2) / 2;
     std::vector<Real> full_coeff(npure * ncart);
 
     std::vector<int> shg_indices;
     if (libint2::solid_harmonics_ordering() ==
         libint2::SHGShellOrdering_Standard) {
-      for (signed char pure_idx = 0, m = -l_; pure_idx != npure;
-           ++pure_idx, ++m)
+      for (signed int pure_idx = 0, m = -l_; pure_idx != npure; ++pure_idx, ++m)
         shg_indices.push_back(m);
     } else if (libint2::solid_harmonics_ordering() ==
                libint2::SHGShellOrdering_Gaussian) {
-      for (signed char pure_idx = 0, m = 0; pure_idx != npure;
+      for (signed int pure_idx = 0, m = 0; pure_idx != npure;
            ++pure_idx, m = (m > 0 ? -m : 1 - m))
         shg_indices.push_back(m);
     } else {
@@ -190,10 +200,10 @@ class SolidHarmonicsCoefficients {
           "libint2::solid_harmonics_ordering() value not recognized."));
     }
 
-    for (signed char pure_idx = 0; pure_idx != npure; ++pure_idx) {
+    for (signed int pure_idx = 0; pure_idx != npure; ++pure_idx) {
       int m = shg_indices[pure_idx];
-      signed char cart_idx = 0;
-      signed char lx, ly, lz;
+      int cart_idx = 0;
+      int lx, ly, lz;
       FOR_CART(lx, ly, lz, l_)
       full_coeff[pure_idx * ncart + cart_idx] = coeff(l_, m, lx, ly, lz);
       // std::cout << "Solid(" << (int)l_ << "," << (int)m << ") += Cartesian("
@@ -214,11 +224,11 @@ class SolidHarmonicsCoefficients {
     row_offset_.resize(npure + 1);
     // 3) copy
     {
-      unsigned short pc = 0;
-      unsigned short cnt = 0;
-      for (unsigned short p = 0; p != npure; ++p) {
+      unsigned int pc = 0;
+      unsigned int cnt = 0;
+      for (unsigned int p = 0; p != npure; ++p) {
         row_offset_[p] = cnt;
-        for (unsigned short c = 0; c != ncart; ++c, ++pc) {
+        for (unsigned int c = 0; c != ncart; ++c, ++pc) {
           if (full_coeff[pc] != 0.0) {
             values_[cnt] = full_coeff[pc];
             colidx_[cnt] = c;
diff --git a/include/libint2/util/intrinsic_types.h b/include/libint2/util/intrinsic_types.h
index fc874d394..8f333f1d8 100644
--- a/include/libint2/util/intrinsic_types.h
+++ b/include/libint2/util/intrinsic_types.h
@@ -22,12 +22,20 @@
 #define _libint2_include_libint2intrinsictypes_h_
 
 #include <libint2/config.h>
+#ifdef __cplusplus
+#include <climits>
+#else
 #include <limits.h>
+#endif
 
 /* determine default LIBINT2 64-bit integer */
 #ifdef HAVE_STDINT_H
 
+#ifdef __cplusplus
+#include <cstdint>
+#else
 #include <stdint.h>
+#endif
 /* because mpz_class does not mesh with long long types, only use those when
  * absolutely necessary */
 #if UINT_LEAST64_MAX != ULONG_MAX
diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index d2881183b..8d208898d 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -44,12 +44,14 @@
 #endif  // LIBINT2_HAVE_BTAS
 
 // Libint Gaussian integrals library
-#include <libint2/chemistry/sto3g_atomic_density.h>
+#include <libint2/dfbs_generator.h>
 #include <libint2/diis.h>
 #include <libint2/lcao/molden.h>
+#include <libint2/soad_fock.h>
 #include <libint2/util/intpart_iter.h>
 
 #include <libint2.hpp>
+
 #if !LIBINT2_CONSTEXPR_STATICS
 #include <libint2/statics_definition.h>
 #endif
@@ -76,6 +78,10 @@ typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
 typedef Eigen::DiagonalMatrix<double, Eigen::Dynamic, Eigen::Dynamic>
     DiagonalMatrix;
 
+#ifdef LIBINT2_HAVE_BTAS
+typedef btas::Tensor<double> tensor;
+#endif
+
 using libint2::Atom;
 using libint2::BasisSet;
 using libint2::BraKet;
@@ -166,10 +172,8 @@ struct DFFockEngine {
   const BasisSet& dfbs;
   DFFockEngine(const BasisSet& _obs, const BasisSet& _dfbs)
       : obs(_obs), dfbs(_dfbs) {}
-
-  typedef btas::RangeNd<CblasRowMajor, std::array<long, 3>> Range3d;
-  typedef btas::Tensor<double, Range3d> Tensor3d;
-  Tensor3d xyK;
+  typedef btas::Tensor<double> Tensor;
+  Tensor xyK;
 
   // a DF-based builder, using coefficients of occupied MOs
   Matrix compute_2body_fock_dfC(const Matrix& Cocc);
@@ -217,9 +221,18 @@ int main(int argc, char* argv[]) {
     const auto filename = (argc > 1) ? argv[1] : "h2o.xyz";
     const auto basisname = (argc > 2) ? argv[2] : "aug-cc-pVDZ";
     bool do_density_fitting = false;
+    double cholesky_threshold = 1e-4;
 #ifdef HAVE_DENSITY_FITTING
     do_density_fitting = (argc > 3);
-    const auto dfbasisname = do_density_fitting ? argv[3] : "";
+    const std::string dfbasisname = do_density_fitting ? argv[3] : "";
+
+    // if autoDF use e.g. -> autodf(1e-6) as command line argument
+    if (dfbasisname.rfind("autodf", 0) == 0) {
+      cholesky_threshold =
+          std::stod(dfbasisname.substr(7, dfbasisname.length() - 8));
+      std::cout << "New Cholesky threshold for generating df basis set = "
+                << cholesky_threshold << std::endl;
+    }
 #endif
     std::vector<Atom> atoms = read_geometry(filename);
 
@@ -275,10 +288,24 @@ int main(int argc, char* argv[]) {
     BasisSet obs(basisname, atoms);
     cout << "orbital basis set rank = " << obs.nbf() << endl;
 
+    // initializes the Libint integrals library ... now ready to compute
+    libint2::initialize();
+
 #ifdef HAVE_DENSITY_FITTING
     BasisSet dfbs;
     if (do_density_fitting) {
-      dfbs = BasisSet(dfbasisname, atoms);
+      if (dfbasisname.rfind("autodf", 0) == 0) {
+        std::vector<Shell> dfbs_shells;
+        for (auto&& atom : atoms) {
+          libint2::DFBasisSetGenerator dfbs_generator(basisname, atom,
+                                                      cholesky_threshold);
+          auto reduced_shells = dfbs_generator.reduced_shells();
+          dfbs_shells.insert(dfbs_shells.end(), reduced_shells.begin(),
+                             reduced_shells.end());
+        }
+        dfbs = BasisSet(std::move(dfbs_shells));
+      } else
+        dfbs = BasisSet(dfbasisname, atoms);
       cout << "density-fitting basis set rank = " << dfbs.nbf() << endl;
     }
 #endif  // HAVE_DENSITY_FITTING
@@ -287,13 +314,6 @@ int main(int argc, char* argv[]) {
     /*** compute 1-e integrals       ***/
     /*** =========================== ***/
 
-    // initializes the Libint integrals library ... now ready to compute
-    libint2::set_solid_harmonics_ordering(libint2::SHGShellOrdering_Gaussian);
-    libint2::initialize();
-    printf("Configuration G: sho=%d components=%s\n",
-           libint2::solid_harmonics_ordering(),
-           libint2::configuration_accessor().c_str());
-
     // compute OBS non-negligible shell-pair list
     {
       const auto tstart = std::chrono::high_resolution_clock::now();
@@ -343,7 +363,8 @@ int main(int argc, char* argv[]) {
     {  // use SOAD as the guess density
       const auto tstart = std::chrono::high_resolution_clock::now();
 
-      auto D_minbs = compute_soad(atoms);  // compute guess in minimal basis
+      auto D_minbs =
+          libint2::compute_soad(atoms);  // compute guess in minimal basis
       BasisSet minbs("STO-3G", atoms);
       if (minbs == obs)
         D = D_minbs;
@@ -689,7 +710,9 @@ int main(int argc, char* argv[]) {
     }
 
     {  // compute hessian
-      LIBINT_MAYBE_UNUSED const auto ncoords = 3 * atoms.size();
+      const auto ncoords = 3 * atoms.size();
+      // # of elems in upper triangle
+      const auto nelem = ncoords * (ncoords + 1) / 2;
 #if LIBINT2_DERIV_ONEBODY_ORDER > 1
       // compute 1-e hessian
       Matrix H1 = Matrix::Zero(ncoords, ncoords);
@@ -721,7 +744,7 @@ int main(int argc, char* argv[]) {
       }
 
       std::cout << "** 1-body hessian = ";
-      for (auto row = 0; row != ncoords; ++row) {
+      for (auto row = 0, i = 0; row != ncoords; ++row) {
         for (auto col = row; col != ncoords; ++col) {
           std::cout << H1(row, col) << " ";
         }
@@ -729,7 +752,7 @@ int main(int argc, char* argv[]) {
       std::cout << std::endl;
 
       std::cout << "** Pulay hessian = ";
-      for (auto row = 0; row != ncoords; ++row) {
+      for (auto row = 0, i = 0; row != ncoords; ++row) {
         for (auto col = row; col != ncoords; ++col) {
           std::cout << H_Pulay(row, col) << " ";
         }
@@ -754,7 +777,7 @@ int main(int argc, char* argv[]) {
       }
 
       std::cout << "** 2-body hessian = ";
-      for (auto row = 0; row != ncoords; ++row) {
+      for (auto row = 0, i = 0; row != ncoords; ++row) {
         for (auto col = row; col != ncoords; ++col) {
           std::cout << H2(row, col) << " ";
         }
@@ -817,7 +840,7 @@ int main(int argc, char* argv[]) {
       }
 
       std::cout << "** nuclear repulsion hessian = ";
-      for (auto row = 0; row != ncoords; ++row) {
+      for (auto row = 0, i = 0; row != ncoords; ++row) {
         for (auto col = row; col != ncoords; ++col) {
           std::cout << HN(row, col) << " ";
         }
@@ -826,7 +849,7 @@ int main(int argc, char* argv[]) {
 
       auto H = H1 + H_Pulay + H2 + HN;
       std::cout << "** Hartree-Fock hessian = ";
-      for (auto row = 0; row != ncoords; ++row) {
+      for (auto row = 0, i = 0; row != ncoords; ++row) {
         for (auto col = row; col != ncoords; ++col) {
           std::cout << H(row, col) << " ";
         }
@@ -965,7 +988,7 @@ std::array<Matrix, libint2::operator_traits<obtype>::nopers> compute_1body_ints(
     // loop over unique shell pairs, {s1,s2} such that s1 >= s2
     // this is due to the permutational symmetry of the real integrals over
     // Hermitian operators: (1|2) = (2|1)
-    for (auto s1 = 0l; s1 != nshells; ++s1) {
+    for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
       auto bf1 = shell2bf[s1];  // first basis function in this shell
       auto n1 = obs[s1].size();
 
@@ -977,6 +1000,8 @@ std::array<Matrix, libint2::operator_traits<obtype>::nopers> compute_1body_ints(
         auto bf2 = shell2bf[s2];
         auto n2 = obs[s2].size();
 
+        auto n12 = n1 * n2;
+
         // compute shell pair; return is the pointer to the buffer
         engines[thread_id].compute(obs[s1], obs[s2]);
 
@@ -1046,7 +1071,7 @@ std::vector<Matrix> compute_1body_ints_deriv(unsigned deriv_order,
     // loop over unique shell pairs, {s1,s2} such that s1 >= s2
     // this is due to the permutational symmetry of the real integrals over
     // Hermitian operators: (1|2) = (2|1)
-    for (auto s1 = 0l; s1 != nshells; ++s1) {
+    for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
       auto bf1 = shell2bf[s1];  // first basis function in this shell
       auto n1 = obs[s1].size();
       auto atom1 = shell2atom[s1];
@@ -1061,6 +1086,8 @@ std::vector<Matrix> compute_1body_ints_deriv(unsigned deriv_order,
         auto n2 = obs[s2].size();
         auto atom2 = shell2atom[s2];
 
+        auto n12 = n1 * n2;
+
         // compute shell pair; return is the pointer to the buffer
         engines[thread_id].compute(obs[s1], obs[s2]);
 
@@ -1185,7 +1212,7 @@ std::vector<Matrix> compute_1body_ints_deriv(unsigned deriv_order,
             ShellSetDerivIterator shellset_diter(deriv_order,
                                                  nderivcenters_shset);
             while (shellset_diter) {
-              LIBINT_MAYBE_UNUSED const auto& deriv = *shellset_diter;
+              const auto& deriv = *shellset_diter;
             }
           }
         }  // copy shell block switch
@@ -1505,7 +1532,6 @@ Matrix compute_2body_2index_ints(const BasisSet& bs) {
   }
 
   auto shell2bf = bs.shell2bf();
-  auto unitshell = Shell::unit();
 
   auto compute = [&](int thread_id) {
     auto& engine = engines[thread_id];
@@ -1559,6 +1585,7 @@ Matrix compute_2body_fock(const BasisSet& obs, const Matrix& D,
   auto fock_precision = precision;
   // standard approach is to omit *contributions* to the Fock matrix smaller
   // than fock_precision ... this relies on massive amount of error cancellation
+  auto max_nprim = obs.max_nprim();
   auto needed_engine_precision = (fock_precision / D_shblk_norm.maxCoeff());
   assert(needed_engine_precision > max_engine_precision &&
          "using precomputed shell pair data limits the max engine precision"
@@ -1767,6 +1794,7 @@ std::vector<Matrix> compute_2body_fock_deriv(const BasisSet& obs,
   auto fock_precision = precision;
   // standard approach is to omit *contributions* to the Fock matrix smaller
   // than fock_precision ... this relies on massive amount of error cancellation
+  auto max_nprim = obs.max_nprim();
   auto needed_engine_precision = (fock_precision / D_shblk_norm.maxCoeff());
   assert(needed_engine_precision > max_engine_precision &&
          "using precomputed shell pair data limits the max engine precision"
@@ -1939,6 +1967,8 @@ std::vector<Matrix> compute_2body_fock_deriv(const BasisSet& obs,
                   const int xyz = d % 3;
 
                   auto coord = shell_atoms[a] * 3 + xyz;
+                  auto& g = G[thread_id * nderiv + coord];
+
                   int coord1 = 0, coord2 = 0;
 
                   add_shellset_to_dest(thread_id * nderiv + coord, d, coord1,
@@ -2029,7 +2059,7 @@ Matrix compute_2body_fock_general(const BasisSet& obs, const Matrix& D,
                                   double precision) {
   const auto n = obs.nbf();
   const auto nshells = obs.size();
-  LIBINT_MAYBE_UNUSED const auto n_D = D_bs.nbf();
+  const auto n_D = D_bs.nbf();
   assert(D.cols() == D.rows() && D.cols() == n_D);
 
   using libint2::nthreads;
@@ -2158,11 +2188,6 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
   std::vector<libint2::Timers<5>> timers(nthreads);
   for (auto& timer : timers) timer.set_now_overhead(25);
 
-  typedef btas::RangeNd<CblasRowMajor, std::array<long, 1>> Range1d;
-  typedef btas::RangeNd<CblasRowMajor, std::array<long, 2>> Range2d;
-  typedef btas::Tensor<double, Range1d> Tensor1d;
-  typedef btas::Tensor<double, Range2d> Tensor2d;
-
   // using first time? compute 3-center ints and transform to inv sqrt
   // representation
   if (xyK.size() == 0) {
@@ -2187,7 +2212,7 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
     auto shell2bf = obs.shell2bf();
     auto shell2bf_df = dfbs.shell2bf();
 
-    Tensor3d Zxy{ndf, n, n};
+    Tensor Zxy{ndf, n, n};
 
     auto lambda = [&](int thread_id) {
       auto& engine = engines[thread_id];
@@ -2264,12 +2289,12 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
     //  std::cout << "||V^-1 - L^-1^t L^-1|| = " << (V.inverse() - Linv_t *
     //  Linv_t.transpose()).norm() << std::endl;
 
-    Tensor2d K{ndf, ndf};
+    Tensor K{ndf, ndf};
     std::copy(Linv_t.data(), Linv_t.data() + ndf * ndf, K.begin());
 
-    xyK = Tensor3d{n, n, ndf};
+    xyK = Tensor{n, n, ndf};
     btas::contract(1.0, Zxy, {1, 2, 3}, K, {1, 4}, 0.0, xyK, {2, 3, 4});
-    Zxy = Tensor3d{0, 0, 0};  // release memory
+    Zxy = Tensor{0, 0, 0};  // release memory
 
     timers[0].stop(2);
     std::cout << "time for integrals metric tform = " << timers[0].read(2)
@@ -2280,12 +2305,12 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
   timers[0].start(3);
 
   const auto nocc = Cocc.cols();
-  Tensor2d Co{n, nocc};
+  Tensor Co{n, nocc};
   std::copy(Cocc.data(), Cocc.data() + n * nocc, Co.begin());
-  Tensor3d xiK{n, nocc, ndf};
+  Tensor xiK{n, nocc, ndf};
   btas::contract(1.0, xyK, {1, 2, 3}, Co, {2, 4}, 0.0, xiK, {1, 4, 3});
 
-  Tensor2d G{n, n};
+  Tensor G{n, n};
   btas::contract(1.0, xiK, {1, 2, 3}, xiK, {4, 2, 3}, 0.0, G, {1, 4});
 
   timers[0].stop(3);
@@ -2294,9 +2319,9 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
   // compute Coulomb
   timers[0].start(4);
 
-  Tensor1d Jtmp{ndf};
+  Tensor Jtmp{ndf};
   btas::contract(1.0, xiK, {1, 2, 3}, Co, {1, 2}, 0.0, Jtmp, {3});
-  xiK = Tensor3d{0, 0, 0};
+  xiK = Tensor{0, 0, 0};
   btas::contract(2.0, xyK, {1, 2, 3}, Jtmp, {3}, -1.0, G, {1, 2});
 
   timers[0].stop(4);
@@ -2399,11 +2424,13 @@ void api_basic_compile_test(const BasisSet& obs,
         eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
         eri2_engine.compute(obs[s1], obs[s2]);
 
+        auto bf1 = shell2bf[s1];   // first basis function in first shell
         auto n1 = obs[s1].size();  // number of basis functions in first shell
+        auto bf2 = shell2bf[s2];   // first basis function in second shell
         auto n2 = obs[s2].size();  // number of basis functions in second shell
 
-        LIBINT_MAYBE_UNUSED const auto* buf4 = results4[0];
-        LIBINT_MAYBE_UNUSED const auto* buf2 = results2[0];
+        const auto* buf4 = results4[0];
+        const auto* buf2 = results2[0];
 
         // this iterates over integrals in the order they are packed in array
         // ints_shellset
@@ -2427,13 +2454,16 @@ void api_basic_compile_test(const BasisSet& obs,
           eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], obs[s3]);
           eri3_engine.compute(obs[s1], obs[s2], obs[s3]);
 
+          auto bf1 = shell2bf[s1];   // first basis function in first shell
           auto n1 = obs[s1].size();  // number of basis functions in first shell
+          auto bf2 = shell2bf[s2];   // first basis function in second shell
           auto n2 =
-              obs[s2].size();  // number of basis functions in second shell
+              obs[s2].size();       // number of basis functions in second shell
+          auto bf3 = shell2bf[s3];  // first basis function in third shell
           auto n3 = obs[s3].size();  // number of basis functions in third shell
 
-          LIBINT_MAYBE_UNUSED const auto* buf4 = results4[0];
-          LIBINT_MAYBE_UNUSED const auto* buf3 = results3[0];
+          const auto* buf4 = results4[0];
+          const auto* buf3 = results3[0];
 
           // this iterates over integrals in the order they are packed in array
           // ints_shellset
@@ -2460,13 +2490,15 @@ void api_basic_compile_test(const BasisSet& obs,
         eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
         eri2_engine.compute(obs[s1], obs[s2]);
 
+        auto bf1 = shell2bf[s1];   // first basis function in first shell
         auto n1 = obs[s1].size();  // number of basis functions in first shell
+        auto bf2 = shell2bf[s2];   // first basis function in second shell
         auto n2 = obs[s2].size();  // number of basis functions in second shell
 
         // loop over derivative shell sets
         for (auto d = 0; d != 6; ++d) {
-          LIBINT_MAYBE_UNUSED const auto* buf4 = results4[d < 3 ? d : d + 3];
-          LIBINT_MAYBE_UNUSED const auto* buf2 = results2[d];
+          const auto* buf4 = results4[d < 3 ? d : d + 3];
+          const auto* buf2 = results2[d];
 
           // this iterates over integrals in the order they are packed in array
           // ints_shellset
@@ -2493,7 +2525,9 @@ void api_basic_compile_test(const BasisSet& obs,
         eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
         eri2_engine.compute(obs[s1], obs[s2]);
 
+        auto bf1 = shell2bf[s1];   // first basis function in first shell
         auto n1 = obs[s1].size();  // number of basis functions in first shell
+        auto bf2 = shell2bf[s2];   // first basis function in second shell
         auto n2 = obs[s2].size();  // number of basis functions in second shell
 
         // loop over derivative shell sets
@@ -2502,8 +2536,8 @@ void api_basic_compile_test(const BasisSet& obs,
           for (auto d2 = d1; d2 != 6; ++d2, ++d12) {
             const auto dd2 = d2 < 3 ? d2 : d2 + 3;
             const auto dd12 = dd1 * (24 - dd1 - 1) / 2 + dd2;
-            LIBINT_MAYBE_UNUSED const auto* buf4 = results4[dd12];
-            LIBINT_MAYBE_UNUSED const auto* buf2 = results2[d12];
+            const auto* buf4 = results4[dd12];
+            const auto* buf2 = results2[d12];
 
             // this iterates over integrals in the order they are packed in
             // array
diff --git a/tests/unit/Makefile b/tests/unit/Makefile
index bd0257772..6b8fcc213 100644
--- a/tests/unit/Makefile
+++ b/tests/unit/Makefile
@@ -26,7 +26,8 @@ CXXDEPENDFLAGS = -M
 TEST1 = test
 CXXTEST1SRC = test.cc test-core.cc test-permute.cc \
   test-1body.cc test-core-ints.cc test-c-api.cc test-shell-order.cc \
-  test-util.cc c-api-util.cc test-2body.cc test-precision.cc test-basis.cc
+  test-util.cc c-api-util.cc test-2body.cc test-precision.cc test-basis.cc\
+  test-df-autogen.cc
 CXXTEST1OBJ = $(CXXTEST1SRC:%.cc=%.$(OBJSUF))
 CXXTEST1DEP = $(CXXTEST1SRC:%.cc=%.$(DEPSUF))
 CTEST1SRC = c-api.c
diff --git a/tests/unit/test-core.cc b/tests/unit/test-core.cc
index dd7e0d6f0..5b60fc7c2 100644
--- a/tests/unit/test-core.cc
+++ b/tests/unit/test-core.cc
@@ -18,9 +18,24 @@
  *
  */
 
+#include <libint2/solidharmonics.h>
+
 #include "catch.hpp"
 #include "fixture.h"
 
+TEST_CASE("SolidHarmonicsCoefficients ctor", "[shell]") {
+  using libint2::solidharmonics::SolidHarmonicsCoefficients;
+  REQUIRE_NOTHROW(SolidHarmonicsCoefficients<long double>::instance(12));
+  auto sh12 = SolidHarmonicsCoefficients<long double>::instance(12);
+  CHECK_NOTHROW(sh12.coeff(12, 0, 0, 0, 12));
+  CHECK(sh12.coeff(12, 0, 0, 0, 12) == Approx(1));
+  CHECK_NOTHROW(
+      SolidHarmonicsCoefficients<long double>::make_instance(25).coeff(25, 0, 0,
+                                                                       0, 25));
+  CHECK(SolidHarmonicsCoefficients<long double>::make_instance(25).coeff(
+            25, 0, 0, 0, 25) == Approx(1));
+}
+
 TEST_CASE("Shell ctor", "[shell]") {
   REQUIRE_NOTHROW(Shell{});
   auto s0 = Shell{};
diff --git a/tests/unit/test-df-autogen.cc b/tests/unit/test-df-autogen.cc
new file mode 100644
index 000000000..d6021cb17
--- /dev/null
+++ b/tests/unit/test-df-autogen.cc
@@ -0,0 +1,188 @@
+#include <libint2/atom.h>
+#include <libint2/basis.h>
+#include <libint2/dfbs_generator.h>
+#include <libint2/engine.h>
+
+#include <Eigen/Cholesky>
+#include <Eigen/Dense>
+#include <unsupported/Eigen/CXX11/Tensor>
+
+#include "catch.hpp"
+#include "fixture.h"
+
+Eigen::Tensor<double, 3> compute_eri3(const BasisSet &obs,
+                                      const BasisSet &dfbs) {
+  long nshells = obs.size();
+  long nshells_df = dfbs.size();
+  const auto &unitshell = libint2::Shell::unit();
+
+  // construct the 2-electron 3-center repulsion integrals engine
+  // since the code assumes (xx|xs) braket, and Engine/libint only produces
+  // (xs|xx), use 4-center engine
+  libint2::Engine engine(libint2::Operator::coulomb,
+                         std::max(obs.max_nprim(), dfbs.max_nprim()),
+                         std::max(obs.max_l(), dfbs.max_l()), 0);
+  engine.set(BraKet::xs_xx);
+  const auto shell2bf = obs.shell2bf();
+  const auto shell2bf_df = dfbs.shell2bf();
+  long nbf = obs.nbf();
+  long ndf = dfbs.nbf();
+
+  Eigen::Tensor<double, 3> result(ndf, nbf, nbf);
+  const auto &buf = engine.results();
+  for (long s1 = 0; s1 != nshells_df; ++s1) {
+    long bf1 = shell2bf_df[s1];
+    long n1 = dfbs[s1].size();
+    for (long s2 = 0; s2 != nshells; ++s2) {
+      long bf2 = shell2bf[s2];
+      long n2 = obs[s2].size();
+      for (long s3 = 0; s3 != nshells;
+           ++s3) {  // only loop over unique ao shells
+        long bf3 = shell2bf[s3];
+        long n3 = obs[s3].size();
+        engine.compute2<Operator::coulomb, BraKet::xs_xx, 0>(
+            dfbs[s1], unitshell, obs[s2], obs[s3]);
+        long fij = 0;
+        for (long f = 0; f != n1; ++f) {
+          for (long i = 0; i != n2; ++i) {
+            for (long j = 0; j != n3; ++j) {
+              result(bf1 + f, bf2 + i, bf3 + j) = buf[0][fij];
+              ++fij;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return result;
+}
+
+Eigen::MatrixXd compute_eri2(const BasisSet &dfbs) {
+  long ndf = dfbs.nbf();
+  Eigen::MatrixXd result(ndf, ndf);
+  result.fill(0.0);
+  libint2::Engine engine(libint2::Operator::coulomb, dfbs.max_nprim(),
+                         dfbs.max_l(), 0);
+  engine.set(BraKet::xs_xs);
+  engine.set(libint2::ScreeningMethod::Conservative);
+  const auto shell2bf_df = dfbs.shell2bf();
+  const auto &buf = engine.results();
+  for (long s1 = 0; s1 != dfbs.size(); ++s1) {
+    long bf1 = shell2bf_df[s1];
+    long n1 = dfbs[s1].size();
+    for (long s2 = 0; s2 != dfbs.size(); ++s2) {
+      long bf2 = shell2bf_df[s2];
+      long n2 = dfbs[s2].size();
+      engine.compute(dfbs[s1], dfbs[s2]);
+      // "map" buffer to a const Eigen Matrix, and copy it to the corresponding
+      // blocks of the result
+      Eigen::Map<const Eigen::MatrixXd> buf_mat(buf[0], n1, n2);
+      result.block(bf1, bf2, n1, n2) = buf_mat;
+    }
+  }
+  return result;
+}
+
+Eigen::Tensor<double, 4> compute_eri4(const BasisSet &obs) {
+  long nshells = obs.size();
+  libint2::Engine engine(libint2::Operator::coulomb,
+                         std::max(obs.max_nprim(), obs.max_nprim()),
+                         std::max(obs.max_l(), obs.max_l()), 0);
+  engine.set(BraKet::xx_xx);
+  const auto shell2bf = obs.shell2bf();
+  long nbf = obs.nbf();
+
+  Eigen::Tensor<double, 4> result(nbf, nbf, nbf, nbf);
+  const auto &buf = engine.results();
+  for (long s1 = 0; s1 != nshells; ++s1) {
+    long bf1 = shell2bf[s1];
+    long n1 = obs[s1].size();
+    // TODO: loop over all unique quartets: s1 >= s2 >= s3 >= s4
+    for (long s2 = 0; s2 != nshells; ++s2) {
+      long bf2 = shell2bf[s2];
+      long n2 = obs[s2].size();
+      for (long s3 = 0; s3 != nshells; ++s3) {
+        long bf3 = shell2bf[s3];
+        long n3 = obs[s3].size();
+        for (long s4 = 0; s4 != nshells; ++s4) {
+          long bf4 = shell2bf[s4];
+          long n4 = obs[s4].size();
+          engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
+              obs[s1], obs[s2], obs[s3], obs[s4]);
+          long ijkl = 0;
+          for (long i = 0; i != n1; ++i) {
+            for (long j = 0; j != n2; ++j) {
+              for (long k = 0; k != n3; ++k) {
+                for (long l = 0; l != n4; ++l) {
+                  result(bf1 + i, bf2 + j, bf3 + k, bf4 + l) = buf[0][ijkl];
+                  ++ijkl;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return result;
+}
+
+TEST_CASE("DFBS-Generator", "[dfbs-generator]") {
+#if defined(LIBINT2_SUPPORT_ERI) && defined(LIBINT2_SUPPORT_ERI3) && \
+    defined(LIBINT2_SUPPORT_ERI2)
+  // Will use Neon as a test case
+  libint2::Atom atom;
+  atom.atomic_number = 2;
+  atom.x = 0.0;
+  atom.y = 0.0;
+  atom.z = 0.0;
+
+  // Using STO-3G basis set for Neon
+  libint2::BasisSet bs("STO-3G", {atom});
+  long nao = bs.nbf();
+
+  // Use automatic DF basis set generator with tight cholesky threshold
+  libint2::DFBasisSetGenerator dfbs_generator(bs.shells(), 1e-4);
+  const auto dfbs = dfbs_generator.reduced_basis();
+  long ndf = dfbs.nbf();
+
+  // Compute 2-center integrals
+  const auto eri2 = compute_eri2(dfbs);
+
+  // Compute 3-center integrals
+  const auto eri3 = compute_eri3(bs, dfbs);
+
+  // Compute 4-center integrals
+  const auto eri4 = compute_eri4(bs);
+
+  // Compute L_inv
+  Eigen::LLT<Eigen::MatrixXd> V_LLt(eri2);
+  Eigen::MatrixXd I = Eigen::MatrixXd::Identity(ndf, ndf);
+  auto L = V_LLt.matrixL();
+  Eigen::MatrixXd Linv_t = L.solve(I).transpose();
+  auto eri2_inv = Linv_t * Linv_t.transpose();
+
+  // reconstruct 4-center integrals and compare with original
+  double norm = 0.0;
+  Eigen::Tensor<double, 4> eri4_df(nao, nao, nao, nao);
+  for (long i = 0; i < nao; i++) {
+    for (long j = 0; j < nao; j++) {
+      for (long k = 0; k < nao; k++) {
+        for (long l = 0; l < nao; l++) {
+          for (long X = 0; X < ndf; X++) {
+            for (long Y = 0; Y < ndf; Y++) {
+              eri4_df(i, j, k, l) +=
+                  eri3(X, i, j) * eri2_inv(X, Y) * eri3(Y, k, l);
+            }
+          }
+          norm += std::pow(eri4_df(i, j, k, l) - eri4(i, j, k, l), 2);
+        }
+      }
+    }
+  }
+  REQUIRE_NOTHROW(norm < 1e-10);
+#endif  // LIBINT2_SUPPORT_ERI && LIBINT2_SUPPORT_ERI3 && LIBINT2_SUPPORT_ERI2
+}
+//