From 7f1103bd6e0bcdb822d7ce011b0b7466da27140b Mon Sep 17 00:00:00 2001
From: Xiaojie Wu <wxj6000@gmail.com>
Date: Fri, 9 Feb 2024 14:35:55 -0800
Subject: [PATCH 1/3] conv_tol_cpscf = 1e-3 by default (#103)

* conv_tol_cpscf = 1e-3 by default

* mispell e_disp

* fixed unit tests
---
 examples/00-h2o.py                    | 1 +
 examples/dft_driver.py                | 1 +
 gpu4pyscf/df/tests/test_df_hessian.py | 9 +++++++++
 gpu4pyscf/hessian/rhf.py              | 3 ++-
 gpu4pyscf/lib/tests/test_to_gpu.py    | 7 +++++--
 gpu4pyscf/scf/hf.py                   | 3 ++-
 gpu4pyscf/scf/uhf.py                  | 4 ++++
 setup.py                              | 2 +-
 8 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/examples/00-h2o.py b/examples/00-h2o.py
index 9a8eda27..4fab7e94 100644
--- a/examples/00-h2o.py
+++ b/examples/00-h2o.py
@@ -44,6 +44,7 @@
 mf_GPU.conv_tol = scf_tol
 mf_GPU.max_cycle = max_scf_cycles
 mf_GPU.screen_tol = screen_tol
+mf_GPU.conv_tol_cpscf = 1e-3
 
 # Compute Energy
 e_dft = mf_GPU.kernel()
diff --git a/examples/dft_driver.py b/examples/dft_driver.py
index d7479292..f8502140 100644
--- a/examples/dft_driver.py
+++ b/examples/dft_driver.py
@@ -53,6 +53,7 @@
 mf_df.direct_scf = 1e-14
 mf_df.conv_tol = 1e-10
 mf_df.chkfile = None
+mf_df.conv_tol_cpscf = 1e-3
 e_tot = mf_df.kernel()
 scf_time = time.time() - start_time
 print(f'compute time for energy: {scf_time:.3f} s')
diff --git a/gpu4pyscf/df/tests/test_df_hessian.py b/gpu4pyscf/df/tests/test_df_hessian.py
index 08ac27af..3cf63cc0 100644
--- a/gpu4pyscf/df/tests/test_df_hessian.py
+++ b/gpu4pyscf/df/tests/test_df_hessian.py
@@ -114,6 +114,7 @@ class KnownValues(unittest.TestCase):
     def test_hessian_rhf(self):
         print('-----testing DF RHF Hessian----')
         mf = _make_rhf(mol_sph)
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian()
         hobj.set(auxbasis_response=2)
         h = hobj.kernel()
@@ -123,6 +124,7 @@ def test_hessian_rhf(self):
     def test_hessian_lda(self):
         print('-----testing DF LDA Hessian----')
         mf = _make_rks(mol_sph, 'LDA')
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian()
         hobj.set(auxbasis_response=2)
         h = hobj.kernel()
@@ -132,6 +134,7 @@ def test_hessian_lda(self):
     def test_hessian_gga(self):
         print('-----testing DF PBE Hessian----')
         mf = _make_rks(mol_sph, 'PBE')
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian()
         hobj.set(auxbasis_response=2)
         h = hobj.kernel()
@@ -141,6 +144,7 @@ def test_hessian_gga(self):
     def test_hessian_hybrid(self):
         print('-----testing DF B3LYP Hessian----')
         mf = _make_rks(mol_sph, 'b3lyp')
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian()
         hobj.set(auxbasis_response=2)
         h = hobj.kernel()
@@ -150,6 +154,7 @@ def test_hessian_hybrid(self):
     def test_hessian_mgga(self):
         print('-----testing DF M06 Hessian----')
         mf = _make_rks(mol_sph, 'm06')
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian()
         hobj.set(auxbasis_response=2)
         h = hobj.kernel()
@@ -159,6 +164,7 @@ def test_hessian_mgga(self):
     def test_hessian_rsh(self):
         print('-----testing DF wb97 Hessian----')
         mf = _make_rks(mol_sph, 'wb97')
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian()
         hobj.set(auxbasis_response=2)
         h = hobj.kernel()
@@ -171,6 +177,7 @@ def test_hessian_D3(self):
 
         mf = dft.rks.RKS(pmol, xc='B3LYP', disp='d3bj').density_fit(auxbasis=auxbasis0)
         mf.conv_tol = 1e-12
+        mf.conv_tol_cpscf = 1e-7
         mf.grids.level = grids_level
         mf.verbose = 1
         mf.kernel()
@@ -186,6 +193,7 @@ def test_hessian_D4(self):
 
         mf = dft.rks.RKS(pmol, xc='B3LYP', disp='d4').density_fit(auxbasis=auxbasis0)
         mf.conv_tol = 1e-12
+        mf.conv_tol_cpscf = 1e-7
         mf.grids.level = grids_level
         mf.verbose = 1
         mf.kernel()
@@ -198,6 +206,7 @@ def test_hessian_D4(self):
     def test_hessian_cart(self):
         print('-----testing DF Hessian (cartesian)----')
         mf = _make_rks(mol_cart, 'b3lyp')
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian()
         hobj.set(auxbasis_response=2)
         h = hobj.kernel()
diff --git a/gpu4pyscf/hessian/rhf.py b/gpu4pyscf/hessian/rhf.py
index 5efb8f12..9b0e7a15 100644
--- a/gpu4pyscf/hessian/rhf.py
+++ b/gpu4pyscf/hessian/rhf.py
@@ -348,7 +348,8 @@ def _ao2mo(mat):
 
         h1vo = cupy.vstack(h1vo)
         s1vo = cupy.vstack(s1vo)
-        mo1, e1 = cphf.solve(fx, mo_energy, mo_occ, h1vo, s1vo, verbose=verbose)
+        tol = mf.conv_tol_cpscf * (ia1 - ia0)
+        mo1, e1 = cphf.solve(fx, mo_energy, mo_occ, h1vo, s1vo, tol=tol, verbose=verbose)
         # Different from PySCF, mo1 is in AO
         mo1 = mo1.reshape(-1,3,nao,nocc)
         e1 = e1.reshape(-1,3,nocc,nocc)
diff --git a/gpu4pyscf/lib/tests/test_to_gpu.py b/gpu4pyscf/lib/tests/test_to_gpu.py
index 587b3102..7e485b89 100644
--- a/gpu4pyscf/lib/tests/test_to_gpu.py
+++ b/gpu4pyscf/lib/tests/test_to_gpu.py
@@ -88,9 +88,10 @@ def test_df_RHF(self):
         assert numpy.abs(lib.fp(g) - -0.01641213202225146) < 1e-7
 
         mf = scf.RHF(mol).density_fit().run()
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian().to_gpu()
         h = hobj.kernel()
-        assert numpy.abs(lib.fp(h) - 2.198079352288524) < 1e-7
+        assert numpy.abs(lib.fp(h) - 2.198079352288524) < 1e-4
 
     @pytest.mark.skipif(pyscf_24, reason='requires pyscf 2.5 or higher')
     def test_df_b3lyp(self):
@@ -104,9 +105,10 @@ def test_df_b3lyp(self):
         assert numpy.abs(lib.fp(g) - -0.04079190644707999) < 1e-7
 
         mf = rks.RKS(mol, xc='b3lyp').density_fit().run()
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian().to_gpu()
         h = hobj.kernel()
-        assert numpy.abs(lib.fp(h) - 2.1527804103141848) < 1e-7
+        assert numpy.abs(lib.fp(h) - 2.1527804103141848) < 1e-4
 
     @pytest.mark.skipif(pyscf_24, reason='requires pyscf 2.5 or higher')
     def test_df_RKS(self):
@@ -121,6 +123,7 @@ def test_df_RKS(self):
         assert numpy.abs(lib.fp(g) - -0.034343799164131) < 1e-5
 
         mf = rks.RKS(mol, xc='wb97x').density_fit().run()
+        mf.conv_tol_cpscf = 1e-7
         hobj = mf.Hessian().to_gpu()
         h = hobj.kernel()
         assert numpy.abs(lib.fp(h) - 2.187025544697092) < 1e-4
diff --git a/gpu4pyscf/scf/hf.py b/gpu4pyscf/scf/hf.py
index 0367b0c7..98eb7193 100644
--- a/gpu4pyscf/scf/hf.py
+++ b/gpu4pyscf/scf/hf.py
@@ -563,9 +563,10 @@ def _quad_moment(mf, mol=None, dm=None, unit='Debye-Ang'):
 class RHF(hf.RHF):
     from gpu4pyscf.lib.utils import to_cpu, to_gpu, device
 
-    _keys = {'e_disp', 'h1e', 's1e', 'e_mf', 'screen_tol'}
+    _keys = {'e_disp', 'h1e', 's1e', 'e_mf', 'screen_tol', 'conv_tol_cpscf'}
 
     screen_tol = 1e-14
+    conv_tol_cpscf = 1e-3
     DIIS = diis.SCF_DIIS
     get_jk = _get_jk
     _eigh = staticmethod(eigh)
diff --git a/gpu4pyscf/scf/uhf.py b/gpu4pyscf/scf/uhf.py
index 61e5906f..744186bb 100644
--- a/gpu4pyscf/scf/uhf.py
+++ b/gpu4pyscf/scf/uhf.py
@@ -109,6 +109,10 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None,
 class UHF(uhf.UHF):
     from gpu4pyscf.lib.utils import to_cpu, to_gpu, device
 
+    _keys = {'e_disp', 'screen_tol', 'conv_tol_cpscf'}
+
+    screen_tol = 1e-14
+    conv_tol_cpscf = 1e-3
     DIIS = diis.SCF_DIIS
     get_jk = _get_jk
     _eigh = staticmethod(hf.eigh)
diff --git a/setup.py b/setup.py
index 86cfe5d9..7c5be369 100755
--- a/setup.py
+++ b/setup.py
@@ -124,7 +124,7 @@ def initialize_with_default_plat_name(self):
     ],
     cmdclass={'build_py': CMakeBuildPy},
     install_requires=[
-        'pyscf>=2.4.0',
+        'pyscf>=2.5.0',
         f'cupy-cuda{CUDA_VERSION}>=12.3',
         'geometric',
         f'gpu4pyscf-libxc-cuda{CUDA_VERSION}',

From 94fc70c5b0b6e3f11ca2bdaa45cf09bcf4294cbc Mon Sep 17 00:00:00 2001
From: Xiaojie Wu <wxj6000@gmail.com>
Date: Fri, 9 Feb 2024 14:40:42 -0800
Subject: [PATCH 2/3] cupy13 in setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7c5be369..19267725 100755
--- a/setup.py
+++ b/setup.py
@@ -125,7 +125,7 @@ def initialize_with_default_plat_name(self):
     cmdclass={'build_py': CMakeBuildPy},
     install_requires=[
         'pyscf>=2.5.0',
-        f'cupy-cuda{CUDA_VERSION}>=12.3',
+        f'cupy-cuda{CUDA_VERSION}>=13.0',
         'geometric',
         f'gpu4pyscf-libxc-cuda{CUDA_VERSION}',
     ]

From 55b7d7bba26899091c4a90b4c103179ff200ef22 Mon Sep 17 00:00:00 2001
From: Xiaojie Wu <wxj6000@gmail.com>
Date: Tue, 13 Feb 2024 20:56:53 -0800
Subject: [PATCH 3/3] correct cutensor installation in README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9b0ba8e3..6a30cbbf 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ export PYTHONPATH="${PYTHONPATH}:/your-local-path/gpu4pyscf"
 ```
 Then install cutensor for acceleration
 ```sh
-python -m cupyx.tools.install_library --cuda 11.x --library cutensor
+pip3 install cutensor-cu11
 ```
 
 Features