bug w/o cutensor, CCSD unit test (pyscf#98)

* example for CCSD, bug in cutensor * reduced memory footprint * remove water
bytedance · Feb 2, 2024 · 71f731c · 71f731c
1 parent e281d9b
commit 71f731c
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 18 deletions.
diff --git a/examples/18-ccsd.py b/examples/18-ccsd.py
@@ -0,0 +1,27 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import numpy as np
+import pyscf
+from gpu4pyscf.cc import ccsd_incore
+
+mol = pyscf.M(
+    atom = 'Vitamin_C.xyz',
+    basis = 'cc-pvdz',
+    verbose=5)
+
+mf = mol.RHF().run()
+mf.with_df = None
+e_tot = ccsd_incore.CCSD(mf).kernel()
diff --git a/gpu4pyscf/cc/ccsd_incore.py b/gpu4pyscf/cc/ccsd_incore.py
@@ -182,7 +182,8 @@ def _direct_ovvv_vvvv(mycc, t1, t2):
     orbv = cupy.asarray(mo[:,nocc:])
     t1po = orbv.dot(cupy.asarray(t1).T)
     tau = make_tau_tril(t1, t2)
-    x2 = _einsum('xab,pa,qb->xpq', tau, orbv, orbv)
+    x2 = _einsum('xab,pa->xpb', tau, orbv)
+    x2 = _einsum('xpb,qb->xpq', x2, orbv)
     tau = None
 
     nao, nmo = mo.shape
@@ -221,12 +222,14 @@ def contract_vvvv_(eri, i0, i1, j0, j1):
     if vhfopt.uniq_l_ctr[:,0].max() <= gpu_hf.LMAX_ON_GPU:
         # Computing ERIs on GPU
         idx, idy = cupy.tril_indices(nao)
-        eribuf = cupy.empty(blksize**2*nao**2)
+        #eribuf = cupy.empty(blksize**2*nao**2)
         def fint(ish0, ish1, jsh0, jsh1, group_id):
             i0, i1 = ao_loc[ish0], ao_loc[ish1]
             j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
-            eri = cupy.ndarray((i1-i0, nao, j1-j0, nao), memptr=eribuf.data)
-            eri.fill(0.)
+            #eri = cupy.ndarray((i1-i0, nao, j1-j0, nao), memptr=eribuf.data)
+            #eri.fill(0.)
+            eri = cupy.zeros([i1-i0,nao,j1-j0,nao])
+
             # strides to ensure data order consistent with eri(k1-k0,nao,l1-l0,nao)
             strides = [1, (j1-j0)*nao, (j1-j0)*nao**2, nao]
             ao_offsets = [0, 0, i0, j0]
@@ -312,23 +315,33 @@ def fint(ish0, ish1, jsh0, jsh1, group_id):
     t1new = t1tmp.dot(orbv).get()
 
     # vvvv-t2 contractions back to MO repr.
-    Ht2tril = _einsum('xpq,pa,qb->xab', Ht2ao, orbv, orbv)
+    Ht2tril = _einsum('xpq,pa->xaq', Ht2ao, orbv)
+    Ht2tril = _einsum('xaq,qb->xab', Ht2tril, orbv)
 
     # part of ovvv-t2 contractions back to MO repr.
     #: tmp = np.einsum('ijcd,ka,kdcb->ijba', tau, t1, eris.ovvv)
     #: t2new -= tmp + tmp.transpose(1,0,3,2)
     t1pv = orbo.dot(cupy.asarray(t1))
-    Ht2tril -= _einsum('xpq,pa,qb->xab', Ht2ao, orbv, t1pv)
-    Ht2tril -= _einsum('xpq,pa,qb->xab', Ht2ao, t1pv, orbv)
+    tmp = _einsum('xpq,pa->xaq', Ht2ao, orbv)
+    Ht2tril -= _einsum('xaq,qb->xab', tmp, t1pv)
+
+    tmp = _einsum('xpq,pa->xaq', Ht2ao, t1pv)
+    Ht2tril -= _einsum('xaq,qb->xab', tmp, orbv)#_einsum('xpq,pa,qb->xab', Ht2ao, t1pv, orbv)
+
     t2new = _unpack_t2_tril(Ht2tril, nocc, nvir).get()
     Ht2ao = Ht2full = None
 
     c = vhfopt.coeff.get()
     wpq = 2 * lib.einsum('pqkk,pi,qj->ij', wVVoo, c, c)
     wpq -= lib.einsum('pqkk,pi,qj->ji', wVvoO, c, c)
 
-    wVOov = _einsum('pqji,qb,pa->bjia', cupy.asarray(wVvoO), orbv, orbv).get()
-    wVooV = _einsum('pqji,pa,qb->bjia', cupy.asarray(wVVoo),-orbv, orbv).get()
+    tmp = _einsum('pqji,qb->pbji', cupy.asarray(wVvoO), orbv)
+    wVOov = _einsum('pbji,pa->bjia', tmp, orbv).get()
+    #wVOov = _einsum('pqji,qb,pa->bjia', cupy.asarray(wVvoO), orbv, orbv).get()
+
+    tmp = _einsum('pqji,pa->aqji', cupy.asarray(wVVoo), -orbv)
+    wVooV = _einsum('aqji,qb->bjia', tmp, orbv).get()
+    #wVooV = _einsum('pqji,pa,qb->bjia', cupy.asarray(wVVoo),-orbv, orbv).get()
     wVVoo = None
 
     if FREE_CUPY_CACHE:
@@ -521,7 +534,7 @@ class CCSD(ccsd.CCSD):
     def __init__(self, mf, *args, **kwargs):
         if hasattr(mf, 'to_cpu'):
             mf = mf.to_cpu()
-        if mf.with_df:
+        if hasattr(mf, 'with_df') and mf.with_df:
             lib.logger.warn(mf.mol, 'DF-CCSD not available. Run the standard CCSD.')
         ccsd.CCSD.__init__(self, mf, *args, **kwargs)
 

diff --git a/gpu4pyscf/cc/tests/test_ccsd.py b/gpu4pyscf/cc/tests/test_ccsd.py
@@ -1,6 +1,8 @@
 import unittest
 import numpy as np
 import pyscf
+import pytest
+from packaging import version
 from gpu4pyscf.cc import ccsd_incore
 
 def setUpModule():
@@ -42,6 +44,7 @@ def test_ccsd_incore_update_amps(self):
         self.assertAlmostEqual(abs(r1 - t1).max(), 0, 9)
         self.assertAlmostEqual(abs(r2 - t2).max(), 0, 9)
 
+    @pytest.mark.skipif(version.parse(pyscf.__version__) <= version.parse('2.4.0'), reason='requires pyscf 2.5 or higher')
     def test_ccsd_incore_kernel(self):
         ref = mf.CCSD().run()
         mcc = ccsd_incore.CCSD(mf.to_gpu()).run()

diff --git a/gpu4pyscf/lib/cutensor.py b/gpu4pyscf/lib/cutensor.py
@@ -21,11 +21,17 @@
     import cupy_backends.cuda.libs.cutensor  # NOQA
     from cupyx import cutensor
     from cupy_backends.cuda.libs import cutensor as cutensor_backend
-    CUTENSOR_ALGO_DEFAULT = cutensor_backend.ALGO_DEFAULT
+    ALGO_DEFAULT = cutensor_backend.ALGO_DEFAULT
+    OP_IDENTITY = cutensor_backend.OP_IDENTITY
+    JIT_MODE_NONE = cutensor_backend.JIT_MODE_NONE
+    WORKSPACE_RECOMMENDED = cutensor_backend.WORKSPACE_RECOMMENDED
     _tensor_descriptors = {}
 except ImportError:
     cutensor = None
-    CUTENSOR_ALGO_DEFAULT = None
+    ALGO_DEFAULT = None
+    OP_IDENTITY = None
+    JIT_MODE_NONE = None
+    WORKSPACE_RECOMMENDED = None
 
 def _auto_create_mode(array, mode):
     if not isinstance(mode, cutensor.Mode):
@@ -53,13 +59,13 @@ def _create_tensor_descriptor(a):
 def contraction(
     pattern, a, b, alpha, beta,
     out=None,
-    op_a=cutensor_backend.OP_IDENTITY,
-    op_b=cutensor_backend.OP_IDENTITY,
-    op_c=cutensor_backend.OP_IDENTITY,
-    algo=cutensor_backend.ALGO_DEFAULT,
-    jit_mode=cutensor_backend.JIT_MODE_NONE,
+    op_a=OP_IDENTITY,
+    op_b=OP_IDENTITY,
+    op_c=OP_IDENTITY,
+    algo=ALGO_DEFAULT,
+    jit_mode=JIT_MODE_NONE,
     compute_desc=0,
-    ws_pref=cutensor_backend.WORKSPACE_RECOMMENDED
+    ws_pref=WORKSPACE_RECOMMENDED
 ):
 
     pattern = pattern.replace(" ", "")