Skip to content

Commit

Permalink
Finish write the gradient part.
Browse files Browse the repository at this point in the history
  • Loading branch information
puzhichen committed Feb 22, 2024
1 parent c89de15 commit 899ae55
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 40 deletions.
26 changes: 19 additions & 7 deletions gpu4pyscf/df/grad/uks.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,11 @@ def get_veff(ks_grad, mol=None, dm=None):
mem_now = lib.current_memory()[0]
max_memory = max(2000, ks_grad.max_memory*.9-mem_now)
if ks_grad.grid_response:
exc, vxc = uks_grad.get_vxc_full_response(ni, mol, grids, mf.xc, dm,
exc, vxc_tmp = uks_grad.get_vxc_full_response(ni, mol, grids, mf.xc, dm,
max_memory=max_memory,
verbose=ks_grad.verbose)
if mf.nlc or ni.libxc.is_nlc(mf.xc):
raise NotImplementedError
logger.debug1(ks_grad, 'sum(grids response) %s', exc.sum(axis=0))
else:
exc, vxc_tmp = uks_grad.get_vxc(ni, mol, grids, mf.xc, dm,
max_memory=max_memory, verbose=ks_grad.verbose)
Expand All @@ -71,10 +70,23 @@ def get_veff(ks_grad, mol=None, dm=None):
xc = mf.xc
else:
xc = mf.nlc
enlc, vnlc = rks_grad.get_nlc_vxc(
ni, mol, nlcgrids, xc, dm[0]+dm[1],
# dma = dm[0]
# dma = tag_array(dma, mo_coeff=mf.mo_coeff[0], mo_occ=mf.mo_occ[0])
# dmb = dm[1]
# dmb = tag_array(dmb, mo_coeff=mf.mo_coeff[1], mo_occ=mf.mo_occ[1])
# enlc, vnlc = rks_grad.get_nlc_vxc(
# ni, mol, nlcgrids, xc, dma,
# max_memory=max_memory, verbose=ks_grad.verbose)
# vxc_tmp[0] += vnlc
# enlc, vnlc = rks_grad.get_nlc_vxc(
# ni, mol, nlcgrids, xc, dmb,
# max_memory=max_memory, verbose=ks_grad.verbose)
# vxc_tmp[1] += vnlc
enlc, vnlc = uks_grad.get_nlc_vxc(
ni, mol, nlcgrids, xc, dm, mf.mo_coeff, mf.mo_occ,
max_memory=max_memory, verbose=ks_grad.verbose)
vxc_tmp += vnlc
vxc_tmp[0] += vnlc
vxc_tmp[1] += vnlc
t0 = logger.timer(ks_grad, 'vxc', *t0)

mo_coeff_alpha = mf.mo_coeff[0]
Expand Down Expand Up @@ -121,9 +133,9 @@ def get_veff(ks_grad, mol=None, dm=None):
if omega != 0:
vk_lr0, vkaux_lr0 = ks_grad.get_k(mol, dm[0], mo_coeff=ks_grad.base.mo_coeff[0], mo_occ=ks_grad.base.mo_occ[0], omega=omega)
vk_lr1, vkaux_lr1 = ks_grad.get_k(mol, dm[1], mo_coeff=ks_grad.base.mo_coeff[1], mo_occ=ks_grad.base.mo_occ[1], omega=omega)
vk += (vk_lr0-vk_lr1) * (alpha-hyb)
vk += (vk_lr0 + vk_lr1) * (alpha-hyb)
if ks_grad.auxbasis_response:
vk_aux += (vkaux_lr0-vkaux_lr1) * (alpha-hyb)
vk_aux += (vkaux_lr0 + vkaux_lr1) * (alpha-hyb)

vxc += vj - vk
if ks_grad.auxbasis_response:
Expand Down
27 changes: 14 additions & 13 deletions gpu4pyscf/df/tests/test_df_uks_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import numpy as np
import unittest
from pyscf import lib
from gpu4pyscf.dft import rks
from gpu4pyscf.dft import uks

lib.num_threads(8)

Expand All @@ -43,7 +43,7 @@
nlcgrids_level = 3
def setUpModule():
global mol
mol = pyscf.M(atom=atom, basis=bas0, max_memory=32000)
mol = pyscf.M(atom=atom, basis=bas0, max_memory=32000, charge=1, spin=1)
mol.output = '/dev/null'
mol.verbose = 1
mol.build()
Expand All @@ -55,8 +55,8 @@ def tearDownModule():
mol.stdout.close()
del mol

def _check_grad(grid_response=False, xc=xc0, disp=disp0, tol=1e-6):
mf = rks.RKS(mol, xc=xc, disp=disp).density_fit(auxbasis=auxbasis0)
def _check_grad(grid_response=True, xc=xc0, disp=disp0, tol=1e-6):
mf = uks.UKS(mol, xc=xc, disp=disp).density_fit(auxbasis=auxbasis0)
mf.grids.level = grids_level
mf.nlcgrids.level = nlcgrids_level
mf.conv_tol = 1e-10
Expand Down Expand Up @@ -109,28 +109,29 @@ def test_grad_without_grids_response(self):

def test_grad_lda(self):
print("-----LDA testing-------")
_check_grad(xc='LDA', disp=None, tol=1e-6)
_check_grad(xc='LDA', disp=None, tol=1e-5)

def test_grad_gga(self):
print('-----GGA testing-------')
_check_grad(xc='PBE', disp=None, tol=1e-6)
_check_grad(xc='PBE', disp=None, tol=1e-5)

def test_grad_hybrid(self):
print('------hybrid GGA testing--------')
_check_grad(xc='B3LYP', disp=None, tol=1e-6)
_check_grad(xc='B3LYP', disp=None, tol=1e-5)

def test_grad_mgga(self):
print('-------mGGA testing-------------')
_check_grad(xc='m06', disp=None, tol=1e-4)
_check_grad(xc='m06', disp=None, tol=1e-3)

def test_grad_rsh(self):
print('--------RSH testing-------------')
_check_grad(xc='wb97', disp=None, tol=1e-4)
_check_grad(xc='wb97', disp=None, tol=1e-3)

def test_grad_nlc(self):
print('--------nlc testing-------------')
_check_grad(xc='HYB_MGGA_XC_WB97M_V', disp=None, tol=1e-6)
# Not implemented!
# def test_grad_nlc(self):
# print('--------nlc testing-------------')
# _check_grad(xc='HYB_MGGA_XC_WB97M_V', disp=None, tol=1e-6)

if __name__ == "__main__":
print("Full Tests for DF Gradient")
print("Full Tests for UKS DF Gradient")
unittest.main()
95 changes: 79 additions & 16 deletions gpu4pyscf/grad/uks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pyscf import lib
from gpu4pyscf.grad import uhf as uhf_grad
from gpu4pyscf.grad import rks as rks_grad
from gpu4pyscf.dft import numint
from gpu4pyscf.dft import numint, xc_deriv
from gpu4pyscf.lib.cupy_helper import contract, get_avail_mem, add_sparse, load_library, take_last2d, tag_array
from gpu4pyscf.lib import logger
from pyscf import __config__
Expand Down Expand Up @@ -85,17 +85,22 @@ def get_veff(ks_grad, mol=None, dm=None):
xc = mf.xc
else:
xc = mf.nlc
dma = dm[0]
dma = tag_array(dma, mo_coeff=mf.mo_coeff[0], mo_occ=mf.mo_occ[0])
dmb = dm[1]
dmb = tag_array(dmb, mo_coeff=mf.mo_coeff[1], mo_occ=mf.mo_occ[1])
enlc, vnlc = rks_grad.get_nlc_vxc(
ni, mol, nlcgrids, xc, dma,
# dma = dm[0]
# dma = tag_array(dma, mo_coeff=mf.mo_coeff[0], mo_occ=mf.mo_occ[0])
# dmb = dm[1]
# dmb = tag_array(dmb, mo_coeff=mf.mo_coeff[1], mo_occ=mf.mo_occ[1])
# enlc, vnlc = rks_grad.get_nlc_vxc(
# ni, mol, nlcgrids, xc, dma,
# max_memory=max_memory, verbose=ks_grad.verbose)
# vxc_tmp[0] += vnlc
# enlc, vnlc = rks_grad.get_nlc_vxc(
# ni, mol, nlcgrids, xc, dmb,
# max_memory=max_memory, verbose=ks_grad.verbose)
# vxc_tmp[1] += vnlc
enlc, vnlc = get_nlc_vxc(
ni, mol, nlcgrids, xc, dm, mf.mo_coeff, mf.mo_occ,
max_memory=max_memory, verbose=ks_grad.verbose)
vxc_tmp[0] += vnlc
enlc, vnlc = rks_grad.get_nlc_vxc(
ni, mol, nlcgrids, xc, dmb,
max_memory=max_memory, verbose=ks_grad.verbose)
vxc_tmp[1] += vnlc
t0 = logger.timer(ks_grad, 'vxc', *t0)

Expand Down Expand Up @@ -226,9 +231,9 @@ def get_vxc_full_response(ni, mol, grids, xc_code, dms, relativity=0, hermi=1,
coeff = cupy.asarray(opt.coeff)
nao, nao0 = coeff.shape
dms = cupy.asarray(dms)
dms = take_last2d(dms, opt.ao_idx)
# dms = [cupy.einsum('pi,ij,qj->pq', coeff, dm, coeff)
# for dm in dms.reshape(-1,nao0,nao0)]
# dms = take_last2d(dms, opt.ao_idx)
dms = [cupy.einsum('pi,ij,qj->pq', coeff, dm, coeff)
for dm in dms.reshape(-1,nao0,nao0)]
# mo_coeff = cupy.einsum('pq,sqt->spt',coeff,mo_coeff)
# mo_coeff = coeff @ mo_coeff

Expand All @@ -243,7 +248,7 @@ def get_vxc_full_response(ni, mol, grids, xc_code, dms, relativity=0, hermi=1,
mem_avail = get_avail_mem()
comp = (ao_deriv+1)*(ao_deriv+2)*(ao_deriv+3)//6
block_size = int((mem_avail*.4/8/(comp+1)/nao - 3*nao*2)/ ALIGNED) * ALIGNED
block_size = min(block_size//2, MIN_BLK_SIZE)
block_size = min(block_size, MIN_BLK_SIZE)
log.debug1('Available GPU mem %f Mb, block_size %d', mem_avail/1e6, block_size)

if block_size < ALIGNED:
Expand Down Expand Up @@ -299,13 +304,71 @@ def get_vxc_full_response(ni, mol, grids, xc_code, dms, relativity=0, hermi=1,
vmat[1] += vtmp

excsum = None
vmat = take_last2d(vmat, opt.rev_ao_idx)
# vmat = cupy.einsum('pi,npq,qj->nij', coeff, vmat, coeff)
# vmat = take_last2d(vmat, opt.rev_ao_idx)
vmat = cupy.einsum('pi,snpq,qj->snij', coeff, vmat, coeff)

# - sign because nabla_X = -nabla_x
return excsum, -vmat


def get_nlc_vxc(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ, relativity=0, hermi=1,
max_memory=2000, verbose=None):
xctype = ni._xc_type(xc_code)
opt = getattr(ni, 'gdftopt', None)
if opt is None:
ni.build(mol, grids.coords)
opt = ni.gdftopt

mo_occ = cupy.asarray(mo_occ)
mo_coeff = cupy.asarray(mo_coeff)

mol = opt.mol
coeff = cupy.asarray(opt.coeff)
nao, nao0 = coeff.shape
mo_coeff_0 = coeff @ mo_coeff[0]
mo_coeff_1 = coeff @ mo_coeff[1]
nset = 1
assert nset == 1

nlc_coefs = ni.nlc_coeff(xc_code)
if len(nlc_coefs) != 1:
raise NotImplementedError('Additive NLC')
nlc_pars, fac = nlc_coefs[0]

ao_deriv = 2
vvrho = []
for ao_mask, mask, weight, coords \
in ni.block_loop(mol, grids, nao, ao_deriv, max_memory=max_memory):
mo_coeff_mask_0 = mo_coeff_0[mask]
mo_coeff_mask_1 = mo_coeff_1[mask]
rhoa = numint.eval_rho2(mol, ao_mask[:4], mo_coeff_mask_0, mo_occ[0], None, xctype, with_lapl=False)
rhob = numint.eval_rho2(mol, ao_mask[:4], mo_coeff_mask_1, mo_occ[1], None, xctype, with_lapl=False)
vvrho.append(rhoa + rhob)
rho = cupy.hstack(vvrho)

vxc = numint._vv10nlc(rho, grids.coords, rho, grids.weights,
grids.coords, nlc_pars)[1]
vv_vxc = xc_deriv.transform_vxc(rho, vxc, 'GGA', spin=0)

vmat = cupy.zeros((3,nao,nao))
p1 = 0
for ao_mask, mask, weight, coords \
in ni.block_loop(mol, grids, nao, ao_deriv, max_memory):
p0, p1 = p1, p1 + weight.size
wv = vv_vxc[:,p0:p1] * weight
wv[0] *= .5 # *.5 because vmat + vmat.T at the end
vmat_tmp = rks_grad._gga_grad_sum_(ao_mask, wv)
add_sparse(vmat, vmat_tmp, mask)

#vmat = contract('npq,qj->npj', vmat, coeff)
#vmat = contract('pi,npj->nij', coeff, vmat)
rev_ao_idx = opt.rev_ao_idx
vmat = take_last2d(vmat, rev_ao_idx)
exc = None
# - sign because nabla_X = -nabla_x
return exc, -vmat


class Gradients(uhf_grad.Gradients, pyscf.grad.uks.Gradients):
from gpu4pyscf.lib.utils import to_cpu, to_gpu, device

Expand Down
9 changes: 5 additions & 4 deletions gpu4pyscf/scf/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,10 +383,11 @@ def _kernel(mf, conv_tol=1e-10, conv_tol_grad=None,

dm = cupy.asarray(dm0, order='C')
if hasattr(dm0, 'mo_coeff') and hasattr(dm0, 'mo_occ'):
mo_coeff = cupy.asarray(dm0.mo_coeff)
mo_occ = cupy.asarray(dm0.mo_occ)
occ_coeff = cupy.asarray(mo_coeff[:,mo_occ>0])
dm = tag_array(dm, occ_coeff=occ_coeff, mo_occ=mo_occ, mo_coeff=mo_coeff)
if dm0.ndim == 2:
mo_coeff = cupy.asarray(dm0.mo_coeff)
mo_occ = cupy.asarray(dm0.mo_occ)
occ_coeff = cupy.asarray(mo_coeff[:,mo_occ>0])
dm = tag_array(dm, occ_coeff=occ_coeff, mo_occ=mo_occ, mo_coeff=mo_coeff)

# use optimized workflow if possible
if hasattr(mf, 'init_workflow'):
Expand Down

0 comments on commit 899ae55

Please sign in to comment.