diff --git a/README.md b/README.md
index f396f73e..ec2b380a 100644
--- a/README.md
+++ b/README.md
@@ -50,8 +50,7 @@ Features
- Dispersion corrections via [DFTD3](https://github.com/dftd3/simple-dftd3) and [DFTD4](https://github.com/dftd4/dftd4);
- Nonlocal functional correction (vv10) for SCF and gradient;
- ECP is supported and calculated on CPU;
-- PCM solvent models, analytical gradients, and semi-analytical Hessian matrix;
-- SMD solvent models and solvation free energy
+- PCM models, SMD model, their analytical gradients, and semi-analytical Hessian matrix;
Limitations
--------
diff --git a/benchmarks/smd/benchmark_smd.py b/benchmarks/smd/benchmark_smd.py
new file mode 100644
index 00000000..2b15baaa
--- /dev/null
+++ b/benchmarks/smd/benchmark_smd.py
@@ -0,0 +1,108 @@
+# Copyright 2024 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import os
+import numpy
+from pyscf import gto
+from gpu4pyscf.solvent import smd
+from gpu4pyscf.solvent.grad import smd as smd_grad
+
+'''
+Benchmark CDS, gradient, hessian in the SMD model
+'''
+
+path = '../molecules/organic/'
+
+# calculated with qchem 6.1, in kcal/mol
+e_cds_qchem = {}
+e_cds_qchem['water'] = {
+ '020_Vitamin_C.xyz': 5.0737,
+ '031_Inosine.xyz': 2.7129,
+ '033_Bisphenol_A.xyz': 6.2620,
+ '037_Mg_Porphin.xyz': 6.0393,
+ '042_Penicillin_V.xyz': 6.4349,
+ '045_Ochratoxin_A.xyz': 8.8526,
+ '052_Cetirizine.xyz': 4.6430,
+ '057_Tamoxifen.xyz': 5.4743,
+ '066_Raffinose.xyz': 10.2543,
+ '084_Sphingomyelin.xyz': 15.0308,
+ '095_Azadirachtin.xyz': 16.9321,
+ '113_Taxol.xyz': 17.2585,
+ '168_Valinomycin.xyz': 27.3499,
+}
+
+e_cds_qchem['ethanol'] = {
+ '020_Vitamin_C.xyz': 4.2119,
+ '031_Inosine.xyz': 1.0175,
+ '033_Bisphenol_A.xyz': -0.2454,
+ '037_Mg_Porphin.xyz': -2.2391,
+ '042_Penicillin_V.xyz': 1.8338,
+ '045_Ochratoxin_A.xyz': 1.0592,
+ '052_Cetirizine.xyz': -2.5099,
+ '057_Tamoxifen.xyz': -3.9320,
+ '066_Raffinose.xyz': 3.1120,
+ '084_Sphingomyelin.xyz': -3.1963,
+ '095_Azadirachtin.xyz': 6.5286,
+ '113_Taxol.xyz': 2.7271,
+ '168_Valinomycin.xyz': 4.0013,
+}
+
+def _check_energy_grad(filename, solvent='water'):
+ xyz = os.path.join(path, filename)
+ mol = gto.Mole(atom=xyz)
+ mol.build()
+ natm = mol.natm
+ fd_cds = numpy.zeros([natm,3])
+ eps = 1e-4
+ for ia in range(mol.natm):
+ for j in range(3):
+ coords = mol.atom_coords(unit='B')
+ coords[ia,j] += eps
+ mol.set_geom_(coords, unit='B')
+ mol.build()
+
+ smdobj = smd.SMD(mol)
+ smdobj.solvent = solvent
+ e0_cds = smdobj.get_cds()
+
+ coords[ia,j] -= 2.0*eps
+ mol.set_geom_(coords, unit='B')
+ mol.build()
+
+ smdobj = smd.SMD(mol)
+ smdobj.solvent = solvent
+ e1_cds = smdobj.get_cds()
+
+ coords[ia,j] += eps
+ mol.set_geom_(coords, unit='B')
+ fd_cds[ia,j] = (e0_cds - e1_cds) / (2.0 * eps)
+
+ smdobj = smd.SMD(mol)
+ smdobj.solvent = solvent
+ e_cds = smd.get_cds(smdobj) * smd.hartree2kcal
+ grad_cds = smd_grad.get_cds(smdobj)
+ print(f'e_cds by GPU4PySCF: {e_cds}')
+ print(f'e_cds by Q-Chem: {e_cds_qchem[solvent][filename]}')
+ print(f'e_cds(Q-Chem) - e_cds(GPU4PySCF): {e_cds - e_cds_qchem[solvent][filename]}')
+ print(f'norm (fd gradient - analy gradient: {numpy.linalg.norm(fd_cds - grad_cds.get())}')
+ assert numpy.linalg.norm(fd_cds - grad_cds.get()) < 1e-8
+
+if __name__ == "__main__":
+ for filename in os.listdir(path):
+ print(f'---- benchmarking {filename} ----------')
+ print('in water')
+ _check_energy_grad(filename, solvent='water')
+ print('in ethanol')
+ _check_energy_grad(filename, solvent='ethanol')
\ No newline at end of file
diff --git a/examples/14-pcm_solvent.py b/examples/14-pcm_solvent.py
index 8eaabbf8..7fc040c7 100644
--- a/examples/14-pcm_solvent.py
+++ b/examples/14-pcm_solvent.py
@@ -33,7 +33,7 @@
mf.grids.atom_grid = (99,590)
mf.small_rho_cutoff = 1e-10
mf.with_solvent.lebedev_order = 29 # 302 Lebedev grids
-mf.with_solvent.method = 'C-PCM'
+mf.with_solvent.method = 'IEF-PCM'
mf.with_solvent.eps = 78.3553
mf.kernel()
diff --git a/examples/16-smd.py b/examples/16-smd.py
index 32c5802c..10e8ef63 100644
--- a/examples/16-smd.py
+++ b/examples/16-smd.py
@@ -24,11 +24,12 @@
H -0.7570000000 -0.0000000000 -0.4696000000
H 0.7570000000 0.0000000000 -0.4696000000
'''
-mol = pyscf.M(atom=atom, basis='def2-tzvpp', verbose=4)
+atom = 'Vitamin_C.xyz'
+mol = pyscf.M(atom=atom, basis='def2-tzvpp', verbose=6)
-mf = dft.rks.RKS(mol, xc='HYB_GGA_XC_B3LYP')#.density_fit()
+mf = dft.rks.RKS(mol, xc='HYB_GGA_XC_B3LYP').density_fit()
mf = mf.SMD()
-mf.verbose = 4
+mf.verbose = 6
mf.grids.atom_grid = (99,590)
mf.small_rho_cutoff = 1e-10
mf.with_solvent.lebedev_order = 29 # 302 Lebedev grids
@@ -36,3 +37,9 @@
mf.with_solvent.solvent = 'water'
e_tot = mf.kernel()
print('total energy with SMD:', e_tot)
+
+gradobj = mf.nuc_grad_method()
+f = gradobj.kernel()
+
+hessobj = mf.Hessian()
+h = hessobj.kernel()
\ No newline at end of file
diff --git a/gpu4pyscf/__init__.py b/gpu4pyscf/__init__.py
index ea5e78a2..98dd2059 100644
--- a/gpu4pyscf/__init__.py
+++ b/gpu4pyscf/__init__.py
@@ -5,3 +5,8 @@
# monkey patch libxc reference due to a bug in nvcc
from pyscf.dft import libxc
libxc.__reference__ = 'unable to decode the reference due to https://github.com/NVIDIA/cuda-python/issues/29'
+
+from gpu4pyscf.lib.utils import patch_cpu_kernel
+from gpu4pyscf.lib.cupy_helper import tag_array
+from pyscf import lib
+lib.tag_array = tag_array
diff --git a/gpu4pyscf/lib/cupy_helper.py b/gpu4pyscf/lib/cupy_helper.py
index 42eea65c..332486ff 100644
--- a/gpu4pyscf/lib/cupy_helper.py
+++ b/gpu4pyscf/lib/cupy_helper.py
@@ -17,9 +17,11 @@
import os
import sys
+import functools
import numpy as np
import cupy
import ctypes
+from pyscf import lib
from gpu4pyscf.lib import logger
from gpu4pyscf.gto import mole
from gpu4pyscf.lib.cutensor import contract
@@ -93,11 +95,22 @@ def device2host_2d(a_cpu, a_gpu, stream=None):
class CPArrayWithTag(cupy.ndarray):
pass
+@functools.wraps(lib.tag_array)
def tag_array(a, **kwargs):
- ''' attach attributes to cupy ndarray'''
- t = cupy.asarray(a).view(CPArrayWithTag)
- if isinstance(a, CPArrayWithTag):
- t.__dict__.update(a.__dict__)
+ '''
+ a should be cupy/numpy array or tuple of cupy/numpy array
+
+ attach attributes to cupy ndarray for cupy array
+ attach attributes to numpy ndarray for numpy array
+ '''
+ if isinstance(a, cupy.ndarray) or isinstance(a[0], cupy.ndarray):
+ t = cupy.asarray(a).view(CPArrayWithTag)
+ if isinstance(a, CPArrayWithTag):
+ t.__dict__.update(a.__dict__)
+ else:
+ t = np.asarray(a).view(lib.NPArrayWithTag)
+ if isinstance(a, lib.NPArrayWithTag):
+ t.__dict__.update(a.__dict__)
t.__dict__.update(kwargs)
return t
@@ -168,19 +181,23 @@ def add_sparse(a, b, indices):
raise RuntimeError('failed in sparse_add2d')
return a
-def dist_matrix(coords, out=None):
- assert coords.flags.c_contiguous
- n = coords.shape[0]
+def dist_matrix(x, y, out=None):
+ assert x.flags.c_contiguous
+ assert y.flags.c_contiguous
+
+ m = x.shape[0]
+ n = y.shape[0]
if out is None:
- out = cupy.empty([n,n])
+ out = cupy.empty([m,n])
stream = cupy.cuda.get_current_stream()
err = libcupy_helper.dist_matrix(
ctypes.cast(stream.ptr, ctypes.c_void_p),
ctypes.cast(out.data.ptr, ctypes.c_void_p),
- ctypes.cast(coords.data.ptr, ctypes.c_void_p),
- ctypes.cast(coords.data.ptr, ctypes.c_void_p),
- ctypes.c_int(n),
+ ctypes.cast(x.data.ptr, ctypes.c_void_p),
+ ctypes.cast(y.data.ptr, ctypes.c_void_p),
+ ctypes.c_int(m),
+ ctypes.c_int(n)
)
if err != 0:
raise RuntimeError('failed in calculating distance matrix')
@@ -376,10 +393,8 @@ def cart2sph(t, axis=0, ang=1, out=None):
t_cart = t.reshape([i0*nli, li_size[0], i3])
if(out is not None):
out = out.reshape([i0*nli, li_size[1], i3])
- out[:] = cupy.einsum('min,ip->mpn', t_cart, c2s)
- else:
- out = cupy.einsum('min,ip->mpn', t_cart, c2s)
- return out.reshape(out_shape)
+ t_sph = contract('min,ip->mpn', t_cart, c2s, out=out)
+ return t_sph.reshape(out_shape)
# a copy with modification from
# https://github.com/pyscf/pyscf/blob/9219058ac0a1bcdd8058166cad0fb9127b82e9bf/pyscf/lib/linalg_helper.py#L1536
diff --git a/gpu4pyscf/lib/cupy_helper/dist_matrix.cu b/gpu4pyscf/lib/cupy_helper/dist_matrix.cu
index 77f78de7..2b88db4f 100644
--- a/gpu4pyscf/lib/cupy_helper/dist_matrix.cu
+++ b/gpu4pyscf/lib/cupy_helper/dist_matrix.cu
@@ -19,11 +19,11 @@
#define THREADS 32
__global__
-static void _calc_distances(double *dist, const double *x, const double *y, int n)
+static void _calc_distances(double *dist, const double *x, const double *y, int m, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
- if (i >= n || j >= n){
+ if (i >= m || j >= n){
return;
}
@@ -34,12 +34,13 @@ static void _calc_distances(double *dist, const double *x, const double *y, int
}
extern "C" {
-int dist_matrix(cudaStream_t stream, double *dist, const double *x, const double *y, int n)
+int dist_matrix(cudaStream_t stream, double *dist, const double *x, const double *y, int m, int n)
{
- int ntile = (n + THREADS - 1) / THREADS;
+ int ntilex = (m + THREADS - 1) / THREADS;
+ int ntiley = (n + THREADS - 1) / THREADS;
dim3 threads(THREADS, THREADS);
- dim3 blocks(ntile, ntile);
- _calc_distances<<>>(dist, x, y, n);
+ dim3 blocks(ntilex, ntiley);
+ _calc_distances<<>>(dist, x, y, m, n);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
return 1;
diff --git a/gpu4pyscf/lib/gvhf/g3c2e.cuh b/gpu4pyscf/lib/gvhf/g3c2e.cuh
index 9195cddd..4a06f1f7 100644
--- a/gpu4pyscf/lib/gvhf/g3c2e.cuh
+++ b/gpu4pyscf/lib/gvhf/g3c2e.cuh
@@ -673,331 +673,4 @@ static void write_int3c2e_ip2_jk(JKMatrix jk, double *j3, double* k3, int ksh){
}
}
-__global__
-static void GINTrun_int3c2e_ip1_jk_kernel1000(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets)
-{
- int ntasks_ij = offsets.ntasks_ij;
- int ntasks_kl = offsets.ntasks_kl;
- int task_ij = blockIdx.x * blockDim.x + threadIdx.x;
- int task_kl = blockIdx.y * blockDim.y + threadIdx.y;
- bool active = true;
- if (task_ij >= ntasks_ij || task_kl >= ntasks_kl) {
- active = false;
- task_ij = 0;
- task_kl = 0;
- }
- int bas_ij = offsets.bas_ij + task_ij;
- int bas_kl = offsets.bas_kl + task_kl;
- double norm = envs.fac;
- double omega = envs.omega;
- int *bas_pair2bra = c_bpcache.bas_pair2bra;
- int *bas_pair2ket = c_bpcache.bas_pair2ket;
- int ish = bas_pair2bra[bas_ij];
- int jsh = bas_pair2ket[bas_ij];
- int ksh = bas_pair2bra[bas_kl];
- int nprim_ij = envs.nprim_ij;
- int nprim_kl = envs.nprim_kl;
- int prim_ij = offsets.primitive_ij + task_ij * nprim_ij;
- int prim_kl = offsets.primitive_kl + task_kl * nprim_kl;
- double* __restrict__ a12 = c_bpcache.a12;
- double* __restrict__ e12 = c_bpcache.e12;
- double* __restrict__ x12 = c_bpcache.x12;
- double* __restrict__ y12 = c_bpcache.y12;
- double* __restrict__ z12 = c_bpcache.z12;
- double* __restrict__ a1 = c_bpcache.a1;
- int ij, kl;
- int prim_ij0, prim_ij1, prim_kl0, prim_kl1;
- int nbas = c_bpcache.nbas;
- double* __restrict__ bas_x = c_bpcache.bas_coords;
- double* __restrict__ bas_y = bas_x + nbas;
- double* __restrict__ bas_z = bas_y + nbas;
-
- double gout0 = 0;
- double gout1 = 0;
- double gout2 = 0;
- double xi = bas_x[ish];
- double yi = bas_y[ish];
- double zi = bas_z[ish];
- prim_ij0 = prim_ij;
- prim_ij1 = prim_ij + nprim_ij;
- prim_kl0 = prim_kl;
- prim_kl1 = prim_kl + nprim_kl;
- for (ij = prim_ij0; ij < prim_ij1; ++ij) {
- for (kl = prim_kl0; kl < prim_kl1; ++kl) {
- double ai2 = -2.0*a1[ij];
- double aij = a12[ij];
- double eij = e12[ij];
- double xij = x12[ij];
- double yij = y12[ij];
- double zij = z12[ij];
- double akl = a12[kl];
- double ekl = e12[kl];
- double xkl = x12[kl];
- double ykl = y12[kl];
- double zkl = z12[kl];
- double xijxkl = xij - xkl;
- double yijykl = yij - ykl;
- double zijzkl = zij - zkl;
- double aijkl = aij + akl;
- double a1 = aij * akl;
- double a0 = a1 / aijkl;
- double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
- a0 *= theta;
- double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl);
- double fac = eij * ekl * sqrt(a0 / (a1 * a1 * a1));
- double root0, weight0;
- if (x < 3.e-7) {
- root0 = 0.5;
- weight0 = 1.;
- } else {
- double tt = sqrt(x);
- double fmt0 = SQRTPIE4 / tt * erf(tt);
- weight0 = fmt0;
- double e = exp(-x);
- double b = .5 / x;
- double fmt1 = b * (fmt0 - e);
- root0 = fmt1 / (fmt0 - fmt1);
- }
- root0 /= root0 + 1 - root0 * theta;
- double u2 = a0 * root0;
- double tmp2 = akl * u2 / (u2 * aijkl + a1);;
- double c00x = xij - xi - tmp2 * xijxkl;
- double c00y = yij - yi - tmp2 * yijykl;
- double c00z = zij - zi - tmp2 * zijzkl;
- double g_0 = 1;
- double g_1 = c00x;
- double g_2 = 1;
- double g_3 = c00y;
- double g_4 = norm * fac * weight0;
- double g_5 = g_4 * c00z;
-
- double f_1 = ai2 * g_1;
- double f_3 = ai2 * g_3;
- double f_5 = ai2 * g_5;
-
- gout0 += f_1 * g_2 * g_4;
- gout1 += g_0 * f_3 * g_4;
- gout2 += g_0 * g_2 * f_5;
- } }
- int *ao_loc = c_bpcache.ao_loc;
- int i0 = ao_loc[ish] - jk.ao_offsets_i;
- int j0 = ao_loc[jsh] - jk.ao_offsets_j;
- int k0 = ao_loc[ksh] - jk.ao_offsets_k;
-
- int nao = jk.nao;
- double* __restrict__ dm = jk.dm;
- double* __restrict__ rhok = jk.rhok;
- double* __restrict__ rhoj = jk.rhoj;
- double* __restrict__ vj = jk.vj;
- double* __restrict__ vk = jk.vk;
-
- int tx = threadIdx.x;
- int ty = threadIdx.y;
- __shared__ double sdata[THREADSX][THREADSY];
- if (!active){
- gout0 = 0.0; gout1 = 0.0; gout2 = 0.0;
- }
- if (vj != NULL){
- double rhoj_tmp;
- int off_dm = i0 + nao*j0;
- rhoj_tmp = dm[off_dm] * rhoj[k0];
- double vj_tmp[3];
- vj_tmp[0] = gout0*rhoj_tmp;
- vj_tmp[1] = gout1*rhoj_tmp;
- vj_tmp[2] = gout2*rhoj_tmp;
- for (int j = 0; j < 3; j++){
- sdata[tx][ty] = vj_tmp[j]; __syncthreads();
- if(ty<8) sdata[tx][ty] += sdata[tx][ty+8]; __syncthreads();
- if(ty<4) sdata[tx][ty] += sdata[tx][ty+4]; __syncthreads();
- if(ty<2) sdata[tx][ty] += sdata[tx][ty+2]; __syncthreads();
- if(ty<1) sdata[tx][ty] += sdata[tx][ty+1]; __syncthreads();
- if (ty == 0) atomicAdd(vj+i0+j*nao, sdata[tx][0]);
- }
- }
- if (vk != NULL){
- double rhok_tmp;
- int off_rhok = i0 + nao*j0 + k0*nao*nao;
- rhok_tmp = rhok[off_rhok];
- double vk_tmp[3];
- vk_tmp[0] = gout0 * rhok_tmp;
- vk_tmp[1] = gout1 * rhok_tmp;
- vk_tmp[2] = gout2 * rhok_tmp;
- for (int j = 0; j < 3; j++){
- sdata[tx][ty] = vk_tmp[j]; __syncthreads();
- if(ty<8) sdata[tx][ty] += sdata[tx][ty+8]; __syncthreads();
- if(ty<4) sdata[tx][ty] += sdata[tx][ty+4]; __syncthreads();
- if(ty<2) sdata[tx][ty] += sdata[tx][ty+2]; __syncthreads();
- if(ty<1) sdata[tx][ty] += sdata[tx][ty+1]; __syncthreads();
- if (ty == 0) atomicAdd(vk+i0+j*nao, sdata[tx][0]);
- }
- }
-}
-
-
-__global__
-static void GINTrun_int3c2e_ip2_jk_kernel0010(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets)
-{
- int ntasks_ij = offsets.ntasks_ij;
- int ntasks_kl = offsets.ntasks_kl;
- int task_ij = blockIdx.x * blockDim.x + threadIdx.x;
- int task_kl = blockIdx.y * blockDim.y + threadIdx.y;
- bool active = true;
- if (task_ij >= ntasks_ij || task_kl >= ntasks_kl) {
- active = false;
- task_ij = 0;
- task_kl = 0;
- }
- int bas_ij = offsets.bas_ij + task_ij;
- int bas_kl = offsets.bas_kl + task_kl;
- double norm = envs.fac;
- double omega = envs.omega;
- int *bas_pair2bra = c_bpcache.bas_pair2bra;
- int *bas_pair2ket = c_bpcache.bas_pair2ket;
- int ish = bas_pair2bra[bas_ij];
- int jsh = bas_pair2ket[bas_ij];
- int ksh = bas_pair2bra[bas_kl];
- int nprim_ij = envs.nprim_ij;
- int nprim_kl = envs.nprim_kl;
- int prim_ij = offsets.primitive_ij + task_ij * nprim_ij;
- int prim_kl = offsets.primitive_kl + task_kl * nprim_kl;
- double* __restrict__ a12 = c_bpcache.a12;
- double* __restrict__ e12 = c_bpcache.e12;
- double* __restrict__ x12 = c_bpcache.x12;
- double* __restrict__ y12 = c_bpcache.y12;
- double* __restrict__ z12 = c_bpcache.z12;
- double* __restrict__ a1 = c_bpcache.a1;
- int ij, kl;
- int prim_ij0, prim_ij1, prim_kl0, prim_kl1;
- int nbas = c_bpcache.nbas;
- double* __restrict__ bas_x = c_bpcache.bas_coords;
- double* __restrict__ bas_y = bas_x + nbas;
- double* __restrict__ bas_z = bas_y + nbas;
-
- double gout0 = 0;
- double gout1 = 0;
- double gout2 = 0;
- double xk = bas_x[ksh];
- double yk = bas_y[ksh];
- double zk = bas_z[ksh];
- prim_ij0 = prim_ij;
- prim_ij1 = prim_ij + nprim_ij;
- prim_kl0 = prim_kl;
- prim_kl1 = prim_kl + nprim_kl;
- for (ij = prim_ij0; ij < prim_ij1; ++ij) {
- for (kl = prim_kl0; kl < prim_kl1; ++kl) {
- double ak2 = -2.0*a1[kl];
- double aij = a12[ij];
- double eij = e12[ij];
- double xij = x12[ij];
- double yij = y12[ij];
- double zij = z12[ij];
- double akl = a12[kl];
- double ekl = e12[kl];
- double xkl = x12[kl];
- double ykl = y12[kl];
- double zkl = z12[kl];
- double xijxkl = xij - xkl;
- double yijykl = yij - ykl;
- double zijzkl = zij - zkl;
- double aijkl = aij + akl;
- double a1 = aij * akl;
- double a0 = a1 / aijkl;
- double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
- a0 *= theta;
- double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl);
- double fac = norm * eij * ekl * sqrt(a0 / (a1 * a1 * a1));
- double root0, weight0;
- if (x < 3.e-7) {
- root0 = 0.5;
- weight0 = 1.;
- } else {
- double tt = sqrt(x);
- double fmt0 = SQRTPIE4 / tt * erf(tt);
- weight0 = fmt0;
- double e = exp(-x);
- double b = .5 / x;
- double fmt1 = b * (fmt0 - e);
- root0 = fmt1 / (fmt0 - fmt1);
- }
- root0 /= root0 + 1 - root0 * theta;
- double u2 = a0 * root0;
- double tmp4 = .5 / (u2 * aijkl + a1);
- double b00 = u2 * tmp4;
- double tmp1 = 2 * b00;
- double tmp3 = tmp1 * aij;
- double c0px = xkl - xk + tmp3 * xijxkl;
- double c0py = ykl - yk + tmp3 * yijykl;
- double c0pz = zkl - zk + tmp3 * zijzkl;
- double g_0 = 1;
- double g_1 = c0px;
- double g_2 = 1;
- double g_3 = c0py;
- double g_4 = weight0 * fac;
- double g_5 = c0pz * g_4;
-
- double f_1 = ak2 * g_1;
- double f_3 = ak2 * g_3;
- double f_5 = ak2 * g_5;
-
- gout0 += f_1 * g_2 * g_4;
- gout1 += g_0 * f_3 * g_4;
- gout2 += g_0 * g_2 * f_5;
-
- } }
-
- int *ao_loc = c_bpcache.ao_loc;
- int i0 = ao_loc[ish] - jk.ao_offsets_i;
- int j0 = ao_loc[jsh] - jk.ao_offsets_j;
- int k0 = ao_loc[ksh] - jk.ao_offsets_k;
-
- int nao = jk.nao;
- int naux = jk.naux;
- double* __restrict__ dm = jk.dm;
- double* __restrict__ rhok = jk.rhok;
- double* __restrict__ rhoj = jk.rhoj;
- double* __restrict__ vj = jk.vj;
- double* __restrict__ vk = jk.vk;
-
- int tx = threadIdx.x;
- int ty = threadIdx.y;
- __shared__ double sdata[THREADSX][THREADSY];
- if (!active){
- gout0 = 0.0; gout1 = 0.0; gout2 = 0.0;
- }
- if (vj != NULL){
- double rhoj_tmp;
- int off_dm = i0 + nao*j0;
- rhoj_tmp = dm[off_dm] * rhoj[k0];
- double vj_tmp[3];
- vj_tmp[0] = gout0 * rhoj_tmp;
- vj_tmp[1] = gout1 * rhoj_tmp;
- vj_tmp[2] = gout2 * rhoj_tmp;
- for (int j = 0; j < 3; j++){
- sdata[tx][ty] = vj_tmp[j]; __syncthreads();
- if(tx<8) sdata[tx][ty] += sdata[tx+8][ty]; __syncthreads();
- if(tx<4) sdata[tx][ty] += sdata[tx+4][ty]; __syncthreads();
- if(tx<2) sdata[tx][ty] += sdata[tx+2][ty]; __syncthreads();
- if(tx<1) sdata[tx][ty] += sdata[tx+1][ty]; __syncthreads();
- if (tx == 0) atomicAdd(vj+k0+j*naux, sdata[0][ty]);
- }
- }
-
- if (vk != NULL){
- double rhok_tmp;
- int off_rhok = i0 + nao*j0 + k0*nao*nao;
- rhok_tmp = rhok[off_rhok];
- double vk_tmp[3];
- vk_tmp[0] = gout0 * rhok_tmp;
- vk_tmp[1] = gout1 * rhok_tmp;
- vk_tmp[2] = gout2 * rhok_tmp;
- for (int j = 0; j < 3; j++){
- sdata[tx][ty] = vk_tmp[j]; __syncthreads();
- if(tx<8) sdata[tx][ty] += sdata[tx+8][ty]; __syncthreads();
- if(tx<4) sdata[tx][ty] += sdata[tx+4][ty]; __syncthreads();
- if(tx<2) sdata[tx][ty] += sdata[tx+2][ty]; __syncthreads();
- if(tx<1) sdata[tx][ty] += sdata[tx+1][ty]; __syncthreads();
- if (tx == 0) atomicAdd(vk+k0+j*naux, sdata[0][ty]);
- }
- }
-}
diff --git a/gpu4pyscf/lib/gvhf/g3c2e_ip1.cu b/gpu4pyscf/lib/gvhf/g3c2e_ip1.cu
index cd20ac66..3c0c88fc 100644
--- a/gpu4pyscf/lib/gvhf/g3c2e_ip1.cu
+++ b/gpu4pyscf/lib/gvhf/g3c2e_ip1.cu
@@ -13,7 +13,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-
+
template __global__
void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets)
@@ -47,7 +47,7 @@ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o
double uw[NROOTS*2];
double g[2*GSIZE];
double *f = g + GSIZE;
-
+
double* __restrict__ a12 = c_bpcache.a12;
double* __restrict__ x12 = c_bpcache.x12;
double* __restrict__ y12 = c_bpcache.y12;
@@ -69,7 +69,7 @@ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o
as_ksh = lsh;
as_lsh = ksh;
}
-
+
double j3[GPU_CART_MAX * 3];
double k3[GPU_CART_MAX * 3];
for (int k = 0; k < GPU_CART_MAX * 3; k++){
@@ -79,7 +79,7 @@ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o
if (active) {
for (ij = prim_ij; ij < prim_ij+nprim_ij; ++ij) {
for (kl = prim_kl; kl < prim_kl+nprim_kl; ++kl) {
-
+
double aij = a12[ij];
double xij = x12[ij];
double yij = y12[ij];
@@ -94,19 +94,180 @@ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o
double aijkl = aij + akl;
double a1 = aij * akl;
double a0 = a1 / aijkl;
- double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
+ double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
a0 *= theta;
double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl);
GINTrys_root(x, uw);
GINTscale_u(uw, theta);
GINTg0_2e_2d4d(envs, g, uw, norm, as_ish, as_jsh, as_ksh, as_lsh, ij, kl);
-
+
double ai2 = -2.0*exp[ij];
GINTnabla1i_2e(envs, f, g, ai2, envs.i_l, envs.j_l, envs.k_l);
GINTkernel_int3c2e_ip1_getjk_direct(envs, jk, j3, k3, f, g, ish, jsh, ksh);
- }
+ }
}
}
-
+
write_int3c2e_ip1_jk(jk, j3, k3, ish);
}
+
+__global__
+static void GINTrun_int3c2e_ip1_jk_kernel1000(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets)
+{
+ int ntasks_ij = offsets.ntasks_ij;
+ int ntasks_kl = offsets.ntasks_kl;
+ int task_ij = blockIdx.x * blockDim.x + threadIdx.x;
+ int task_kl = blockIdx.y * blockDim.y + threadIdx.y;
+ bool active = true;
+ if (task_ij >= ntasks_ij || task_kl >= ntasks_kl) {
+ active = false;
+ task_ij = 0;
+ task_kl = 0;
+ }
+ int bas_ij = offsets.bas_ij + task_ij;
+ int bas_kl = offsets.bas_kl + task_kl;
+ double norm = envs.fac;
+ double omega = envs.omega;
+ int *bas_pair2bra = c_bpcache.bas_pair2bra;
+ int *bas_pair2ket = c_bpcache.bas_pair2ket;
+ int ish = bas_pair2bra[bas_ij];
+ int jsh = bas_pair2ket[bas_ij];
+ int ksh = bas_pair2bra[bas_kl];
+ int nprim_ij = envs.nprim_ij;
+ int nprim_kl = envs.nprim_kl;
+ int prim_ij = offsets.primitive_ij + task_ij * nprim_ij;
+ int prim_kl = offsets.primitive_kl + task_kl * nprim_kl;
+ double* __restrict__ a12 = c_bpcache.a12;
+ double* __restrict__ e12 = c_bpcache.e12;
+ double* __restrict__ x12 = c_bpcache.x12;
+ double* __restrict__ y12 = c_bpcache.y12;
+ double* __restrict__ z12 = c_bpcache.z12;
+ double* __restrict__ a1 = c_bpcache.a1;
+ int ij, kl;
+ int prim_ij0, prim_ij1, prim_kl0, prim_kl1;
+ int nbas = c_bpcache.nbas;
+ double* __restrict__ bas_x = c_bpcache.bas_coords;
+ double* __restrict__ bas_y = bas_x + nbas;
+ double* __restrict__ bas_z = bas_y + nbas;
+
+ double gout0 = 0;
+ double gout1 = 0;
+ double gout2 = 0;
+ double xi = bas_x[ish];
+ double yi = bas_y[ish];
+ double zi = bas_z[ish];
+ prim_ij0 = prim_ij;
+ prim_ij1 = prim_ij + nprim_ij;
+ prim_kl0 = prim_kl;
+ prim_kl1 = prim_kl + nprim_kl;
+ for (ij = prim_ij0; ij < prim_ij1; ++ij) {
+ for (kl = prim_kl0; kl < prim_kl1; ++kl) {
+ double ai2 = -2.0*a1[ij];
+ double aij = a12[ij];
+ double eij = e12[ij];
+ double xij = x12[ij];
+ double yij = y12[ij];
+ double zij = z12[ij];
+ double akl = a12[kl];
+ double ekl = e12[kl];
+ double xkl = x12[kl];
+ double ykl = y12[kl];
+ double zkl = z12[kl];
+ double xijxkl = xij - xkl;
+ double yijykl = yij - ykl;
+ double zijzkl = zij - zkl;
+ double aijkl = aij + akl;
+ double a1 = aij * akl;
+ double a0 = a1 / aijkl;
+ double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
+ a0 *= theta;
+ double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl);
+ double fac = eij * ekl * sqrt(a0 / (a1 * a1 * a1));
+ double root0, weight0;
+ if (x < 3.e-7) {
+ root0 = 0.5;
+ weight0 = 1.;
+ } else {
+ double tt = sqrt(x);
+ double fmt0 = SQRTPIE4 / tt * erf(tt);
+ weight0 = fmt0;
+ double e = exp(-x);
+ double b = .5 / x;
+ double fmt1 = b * (fmt0 - e);
+ root0 = fmt1 / (fmt0 - fmt1);
+ }
+ root0 /= root0 + 1 - root0 * theta;
+ double u2 = a0 * root0;
+ double tmp2 = akl * u2 / (u2 * aijkl + a1);;
+ double c00x = xij - xi - tmp2 * xijxkl;
+ double c00y = yij - yi - tmp2 * yijykl;
+ double c00z = zij - zi - tmp2 * zijzkl;
+ double g_0 = 1;
+ double g_1 = c00x;
+ double g_2 = 1;
+ double g_3 = c00y;
+ double g_4 = norm * fac * weight0;
+ double g_5 = g_4 * c00z;
+
+ double f_1 = ai2 * g_1;
+ double f_3 = ai2 * g_3;
+ double f_5 = ai2 * g_5;
+
+ gout0 += f_1 * g_2 * g_4;
+ gout1 += g_0 * f_3 * g_4;
+ gout2 += g_0 * g_2 * f_5;
+ } }
+
+ int *ao_loc = c_bpcache.ao_loc;
+ int i0 = ao_loc[ish] - jk.ao_offsets_i;
+ int j0 = ao_loc[jsh] - jk.ao_offsets_j;
+ int k0 = ao_loc[ksh] - jk.ao_offsets_k;
+
+ int nao = jk.nao;
+ double* __restrict__ dm = jk.dm;
+ double* __restrict__ rhok = jk.rhok;
+ double* __restrict__ rhoj = jk.rhoj;
+ double* __restrict__ vj = jk.vj;
+ double* __restrict__ vk = jk.vk;
+
+ int tx = threadIdx.x;
+ int ty = threadIdx.y;
+ __shared__ double sdata[THREADSX][THREADSY];
+ if (!active){
+ gout0 = 0.0; gout1 = 0.0; gout2 = 0.0;
+ }
+ if (vj != NULL){
+ double rhoj_tmp;
+ int off_dm = i0 + nao*j0;
+ rhoj_tmp = dm[off_dm] * rhoj[k0];
+ double vj_tmp[3];
+ vj_tmp[0] = gout0*rhoj_tmp;
+ vj_tmp[1] = gout1*rhoj_tmp;
+ vj_tmp[2] = gout2*rhoj_tmp;
+ for (int j = 0; j < 3; j++){
+ sdata[tx][ty] = vj_tmp[j]; __syncthreads();
+ if(ty<8) sdata[tx][ty] += sdata[tx][ty+8]; __syncthreads();
+ if(ty<4) sdata[tx][ty] += sdata[tx][ty+4]; __syncthreads();
+ if(ty<2) sdata[tx][ty] += sdata[tx][ty+2]; __syncthreads();
+ if(ty<1) sdata[tx][ty] += sdata[tx][ty+1]; __syncthreads();
+ if (ty == 0) atomicAdd(vj+i0+j*nao, sdata[tx][0]);
+ }
+ }
+ if (vk != NULL){
+ double rhok_tmp;
+ int off_rhok = i0 + nao*j0 + k0*nao*nao;
+ rhok_tmp = rhok[off_rhok];
+ double vk_tmp[3];
+ vk_tmp[0] = gout0 * rhok_tmp;
+ vk_tmp[1] = gout1 * rhok_tmp;
+ vk_tmp[2] = gout2 * rhok_tmp;
+ for (int j = 0; j < 3; j++){
+ sdata[tx][ty] = vk_tmp[j]; __syncthreads();
+ if(ty<8) sdata[tx][ty] += sdata[tx][ty+8]; __syncthreads();
+ if(ty<4) sdata[tx][ty] += sdata[tx][ty+4]; __syncthreads();
+ if(ty<2) sdata[tx][ty] += sdata[tx][ty+2]; __syncthreads();
+ if(ty<1) sdata[tx][ty] += sdata[tx][ty+1]; __syncthreads();
+ if (ty == 0) atomicAdd(vk+i0+j*nao, sdata[tx][0]);
+ }
+ }
+}
diff --git a/gpu4pyscf/lib/gvhf/g3c2e_ip2.cu b/gpu4pyscf/lib/gvhf/g3c2e_ip2.cu
index e2a4cdbc..d83f3608 100644
--- a/gpu4pyscf/lib/gvhf/g3c2e_ip2.cu
+++ b/gpu4pyscf/lib/gvhf/g3c2e_ip2.cu
@@ -13,7 +13,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-
+
template __global__
void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets)
@@ -47,7 +47,7 @@ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o
double uw[NROOTS*2];
double g[2*GSIZE];
double *f = g + GSIZE;
-
+
double* __restrict__ a12 = c_bpcache.a12;
double* __restrict__ x12 = c_bpcache.x12;
double* __restrict__ y12 = c_bpcache.y12;
@@ -69,7 +69,7 @@ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o
as_ksh = lsh;
as_lsh = ksh;
}
-
+
double j3[GPU_CART_MAX * 3];
double k3[GPU_CART_MAX * 3];
for (int k = 0; k < GPU_CART_MAX * 3; k++){
@@ -79,7 +79,7 @@ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o
if (active) {
for (ij = prim_ij; ij < prim_ij+nprim_ij; ++ij) {
for (kl = prim_kl; kl < prim_kl+nprim_kl; ++kl) {
-
+
double aij = a12[ij];
double xij = x12[ij];
double yij = y12[ij];
@@ -94,19 +94,187 @@ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o
double aijkl = aij + akl;
double a1 = aij * akl;
double a0 = a1 / aijkl;
- double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
+ double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
a0 *= theta;
double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl);
GINTrys_root(x, uw);
GINTscale_u(uw, theta);
GINTg0_2e_2d4d(envs, g, uw, norm, as_ish, as_jsh, as_ksh, as_lsh, ij, kl);
-
+
double ak2 = -2.0*exp[kl];
GINTnabla1k_2e(envs, f, g, ak2, envs.i_l, envs.j_l, envs.k_l);
GINTkernel_int3c2e_ip2_getjk_direct(envs, jk, j3, k3, f, g, ish, jsh, ksh);
- }
+ }
}
}
-
+
write_int3c2e_ip2_jk(jk, j3, k3, ksh);
}
+
+
+__global__
+static void GINTrun_int3c2e_ip2_jk_kernel0010(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets)
+{
+ int ntasks_ij = offsets.ntasks_ij;
+ int ntasks_kl = offsets.ntasks_kl;
+ int task_ij = blockIdx.x * blockDim.x + threadIdx.x;
+ int task_kl = blockIdx.y * blockDim.y + threadIdx.y;
+ bool active = true;
+ if (task_ij >= ntasks_ij || task_kl >= ntasks_kl) {
+ active = false;
+ task_ij = 0;
+ task_kl = 0;
+ }
+ int bas_ij = offsets.bas_ij + task_ij;
+ int bas_kl = offsets.bas_kl + task_kl;
+ double norm = envs.fac;
+ double omega = envs.omega;
+ int *bas_pair2bra = c_bpcache.bas_pair2bra;
+ int *bas_pair2ket = c_bpcache.bas_pair2ket;
+ int ish = bas_pair2bra[bas_ij];
+ int jsh = bas_pair2ket[bas_ij];
+ int ksh = bas_pair2bra[bas_kl];
+ int nprim_ij = envs.nprim_ij;
+ int nprim_kl = envs.nprim_kl;
+ int prim_ij = offsets.primitive_ij + task_ij * nprim_ij;
+ int prim_kl = offsets.primitive_kl + task_kl * nprim_kl;
+ double* __restrict__ a12 = c_bpcache.a12;
+ double* __restrict__ e12 = c_bpcache.e12;
+ double* __restrict__ x12 = c_bpcache.x12;
+ double* __restrict__ y12 = c_bpcache.y12;
+ double* __restrict__ z12 = c_bpcache.z12;
+ double* __restrict__ a1 = c_bpcache.a1;
+ int ij, kl;
+ int prim_ij0, prim_ij1, prim_kl0, prim_kl1;
+ int nbas = c_bpcache.nbas;
+ double* __restrict__ bas_x = c_bpcache.bas_coords;
+ double* __restrict__ bas_y = bas_x + nbas;
+ double* __restrict__ bas_z = bas_y + nbas;
+
+ double gout0 = 0;
+ double gout1 = 0;
+ double gout2 = 0;
+ double xk = bas_x[ksh];
+ double yk = bas_y[ksh];
+ double zk = bas_z[ksh];
+ prim_ij0 = prim_ij;
+ prim_ij1 = prim_ij + nprim_ij;
+ prim_kl0 = prim_kl;
+ prim_kl1 = prim_kl + nprim_kl;
+ for (ij = prim_ij0; ij < prim_ij1; ++ij) {
+ for (kl = prim_kl0; kl < prim_kl1; ++kl) {
+ double ak2 = -2.0*a1[kl];
+ double aij = a12[ij];
+ double eij = e12[ij];
+ double xij = x12[ij];
+ double yij = y12[ij];
+ double zij = z12[ij];
+ double akl = a12[kl];
+ double ekl = e12[kl];
+ double xkl = x12[kl];
+ double ykl = y12[kl];
+ double zkl = z12[kl];
+ double xijxkl = xij - xkl;
+ double yijykl = yij - ykl;
+ double zijzkl = zij - zkl;
+ double aijkl = aij + akl;
+ double a1 = aij * akl;
+ double a0 = a1 / aijkl;
+ double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
+ a0 *= theta;
+ double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl);
+ double fac = norm * eij * ekl * sqrt(a0 / (a1 * a1 * a1));
+ double root0, weight0;
+ if (x < 3.e-7) {
+ root0 = 0.5;
+ weight0 = 1.;
+ } else {
+ double tt = sqrt(x);
+ double fmt0 = SQRTPIE4 / tt * erf(tt);
+ weight0 = fmt0;
+ double e = exp(-x);
+ double b = .5 / x;
+ double fmt1 = b * (fmt0 - e);
+ root0 = fmt1 / (fmt0 - fmt1);
+ }
+ root0 /= root0 + 1 - root0 * theta;
+ double u2 = a0 * root0;
+ double tmp4 = .5 / (u2 * aijkl + a1);
+ double b00 = u2 * tmp4;
+ double tmp1 = 2 * b00;
+ double tmp3 = tmp1 * aij;
+ double c0px = xkl - xk + tmp3 * xijxkl;
+ double c0py = ykl - yk + tmp3 * yijykl;
+ double c0pz = zkl - zk + tmp3 * zijzkl;
+ double g_0 = 1;
+ double g_1 = c0px;
+ double g_2 = 1;
+ double g_3 = c0py;
+ double g_4 = weight0 * fac;
+ double g_5 = c0pz * g_4;
+
+ double f_1 = ak2 * g_1;
+ double f_3 = ak2 * g_3;
+ double f_5 = ak2 * g_5;
+
+ gout0 += f_1 * g_2 * g_4;
+ gout1 += g_0 * f_3 * g_4;
+ gout2 += g_0 * g_2 * f_5;
+
+ } }
+
+ int *ao_loc = c_bpcache.ao_loc;
+ int i0 = ao_loc[ish] - jk.ao_offsets_i;
+ int j0 = ao_loc[jsh] - jk.ao_offsets_j;
+ int k0 = ao_loc[ksh] - jk.ao_offsets_k;
+
+ int nao = jk.nao;
+ int naux = jk.naux;
+ double* __restrict__ dm = jk.dm;
+ double* __restrict__ rhok = jk.rhok;
+ double* __restrict__ rhoj = jk.rhoj;
+ double* __restrict__ vj = jk.vj;
+ double* __restrict__ vk = jk.vk;
+
+ int tx = threadIdx.x;
+ int ty = threadIdx.y;
+ __shared__ double sdata[THREADSX][THREADSY];
+ if (!active){
+ gout0 = 0.0; gout1 = 0.0; gout2 = 0.0;
+ }
+ if (vj != NULL){
+ double rhoj_tmp;
+ int off_dm = i0 + nao*j0;
+ rhoj_tmp = dm[off_dm] * rhoj[k0];
+ double vj_tmp[3];
+ vj_tmp[0] = gout0 * rhoj_tmp;
+ vj_tmp[1] = gout1 * rhoj_tmp;
+ vj_tmp[2] = gout2 * rhoj_tmp;
+ for (int j = 0; j < 3; j++){
+ sdata[tx][ty] = vj_tmp[j]; __syncthreads();
+ if(tx<8) sdata[tx][ty] += sdata[tx+8][ty]; __syncthreads();
+ if(tx<4) sdata[tx][ty] += sdata[tx+4][ty]; __syncthreads();
+ if(tx<2) sdata[tx][ty] += sdata[tx+2][ty]; __syncthreads();
+ if(tx<1) sdata[tx][ty] += sdata[tx+1][ty]; __syncthreads();
+ if (tx == 0) atomicAdd(vj+k0+j*naux, sdata[0][ty]);
+ }
+ }
+
+ if (vk != NULL){
+ double rhok_tmp;
+ int off_rhok = i0 + nao*j0 + k0*nao*nao;
+ rhok_tmp = rhok[off_rhok];
+ double vk_tmp[3];
+ vk_tmp[0] = gout0 * rhok_tmp;
+ vk_tmp[1] = gout1 * rhok_tmp;
+ vk_tmp[2] = gout2 * rhok_tmp;
+ for (int j = 0; j < 3; j++){
+ sdata[tx][ty] = vk_tmp[j]; __syncthreads();
+ if(tx<8) sdata[tx][ty] += sdata[tx+8][ty]; __syncthreads();
+ if(tx<4) sdata[tx][ty] += sdata[tx+4][ty]; __syncthreads();
+ if(tx<2) sdata[tx][ty] += sdata[tx+2][ty]; __syncthreads();
+ if(tx<1) sdata[tx][ty] += sdata[tx+1][ty]; __syncthreads();
+ if (tx == 0) atomicAdd(vk+k0+j*naux, sdata[0][ty]);
+ }
+ }
+}
\ No newline at end of file
diff --git a/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip1.cu b/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip1.cu
index 9174520d..5b7faf14 100644
--- a/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip1.cu
+++ b/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip1.cu
@@ -68,17 +68,17 @@ static int GINTrun_tasks_int3c2e_ip1_jk(JKMatrix *jk, BasisProdOffsets *offsets,
extern "C" { __host__
int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache,
- double *vj, double *vk, double *dm, double *rhoj, double *rhok,
+ double *vj, double *vk, double *dm, double *rhoj, double *rhok,
int *ao_offsets, int nao, int naux, int n_dm,
int *bins_locs_ij, int ntasks_kl, int ncp_ij, int cp_kl_id, double omega)
{
ContractionProdType *cp_kl = bpcache->cptype + cp_kl_id;
-
+
int ng[4] = {1,0,0,0};
-
+
// move bpcache to constant memory
checkCudaErrors(cudaMemcpyToSymbol(c_bpcache, bpcache, sizeof(BasisProdCache)));
-
+
JKMatrix jk;
jk.n_dm = n_dm;
jk.nao = nao;
@@ -92,8 +92,7 @@ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache,
jk.ao_offsets_j = ao_offsets[1];
jk.ao_offsets_k = ao_offsets[2];
jk.ao_offsets_l = ao_offsets[3];
- BasisProdOffsets offsets;
-
+
int *bas_pairs_locs = bpcache->bas_pairs_locs;
int *primitive_pairs_locs = bpcache->primitive_pairs_locs;
@@ -101,9 +100,9 @@ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache,
for (int n = 0; n < MAX_STREAMS; n++){
checkCudaErrors(cudaStreamCreate(&streams[n]));
}
-
+
int *idx = (int *)malloc(sizeof(int) * TOT_NF * 3);
- int *l_locs = (int *)malloc(sizeof(int) * (GPU_LMAX + 2));
+ int *l_locs = (int *)malloc(sizeof(int) * (GPU_LMAX + 2));
GINTinit_index1d_xyz(idx, l_locs);
checkCudaErrors(cudaMemcpyToSymbol(c_idx, idx, sizeof(int) * TOT_NF*3));
checkCudaErrors(cudaMemcpyToSymbol(c_l_locs, l_locs, sizeof(int) * (GPU_LMAX + 2)));
@@ -122,7 +121,8 @@ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache,
int ntasks_ij = bins_locs_ij[cp_ij_id+1] - bins_locs_ij[cp_ij_id];
if (ntasks_ij <= 0) continue;
-
+
+ BasisProdOffsets offsets;
offsets.ntasks_ij = ntasks_ij;
offsets.ntasks_kl = ntasks_kl;
offsets.bas_ij = bas_pairs_locs[cp_ij_id];
@@ -140,7 +140,7 @@ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache,
checkCudaErrors(cudaStreamSynchronize(streams[n]));
checkCudaErrors(cudaStreamDestroy(streams[n]));
}
-
+
return 0;
}
diff --git a/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip2.cu b/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip2.cu
index 7adefe75..cfc45bfa 100644
--- a/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip2.cu
+++ b/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip2.cu
@@ -67,17 +67,17 @@ static int GINTrun_tasks_int3c2e_ip2_jk(JKMatrix *jk, BasisProdOffsets *offsets,
extern "C" { __host__
int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache,
- double *vj, double *vk, double *dm, double *rhoj, double *rhok,
+ double *vj, double *vk, double *dm, double *rhoj, double *rhok,
int *ao_offsets, int nao, int naux, int n_dm,
int *bins_locs_ij, int ntasks_kl, int ncp_ij, int cp_kl_id, double omega)
{
ContractionProdType *cp_kl = bpcache->cptype + cp_kl_id;
-
+
int ng[4] = {0,0,1,0};
-
+
// move bpcache to constant memory
checkCudaErrors(cudaMemcpyToSymbol(c_bpcache, bpcache, sizeof(BasisProdCache)));
-
+
JKMatrix jk;
jk.n_dm = n_dm;
jk.nao = nao;
@@ -91,18 +91,17 @@ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache,
jk.ao_offsets_j = ao_offsets[1];
jk.ao_offsets_k = ao_offsets[2];
jk.ao_offsets_l = ao_offsets[3];
- BasisProdOffsets offsets;
-
+
int *bas_pairs_locs = bpcache->bas_pairs_locs;
int *primitive_pairs_locs = bpcache->primitive_pairs_locs;
-
+
cudaStream_t streams[MAX_STREAMS];
for (int n = 0; n < MAX_STREAMS; n++){
checkCudaErrors(cudaStreamCreate(&streams[n]));
}
-
+
int *idx = (int *)malloc(sizeof(int) * TOT_NF * 3);
- int *l_locs = (int *)malloc(sizeof(int) * (GPU_LMAX + 2));
+ int *l_locs = (int *)malloc(sizeof(int) * (GPU_LMAX + 2));
GINTinit_index1d_xyz(idx, l_locs);
checkCudaErrors(cudaMemcpyToSymbol(c_idx, idx, sizeof(int) * TOT_NF*3));
checkCudaErrors(cudaMemcpyToSymbol(c_l_locs, l_locs, sizeof(int) * (GPU_LMAX + 2)));
@@ -111,7 +110,7 @@ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache,
for (int cp_ij_id = 0; cp_ij_id < ncp_ij; cp_ij_id++){
int n_stream = cp_ij_id % MAX_STREAMS;
-
+
GINTEnvVars envs;
ContractionProdType *cp_ij = bpcache->cptype + cp_ij_id;
GINTinit_EnvVars(&envs, cp_ij, cp_kl, ng);
@@ -123,6 +122,7 @@ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache,
int ntasks_ij = bins_locs_ij[cp_ij_id+1] - bins_locs_ij[cp_ij_id];
if (ntasks_ij <= 0) continue;
+ BasisProdOffsets offsets;
offsets.ntasks_ij = ntasks_ij;
offsets.ntasks_kl = ntasks_kl;
offsets.bas_ij = bas_pairs_locs[cp_ij_id];
@@ -131,7 +131,7 @@ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache,
offsets.primitive_kl = primitive_pairs_locs[cp_kl_id];
int err = GINTrun_tasks_int3c2e_ip2_jk(&jk, &offsets, &envs, streams[n_stream]);
-
+
if (err != 0) {
return err;
}
diff --git a/gpu4pyscf/lib/tests/test_cupy_helper.py b/gpu4pyscf/lib/tests/test_cupy_helper.py
index 2b085674..987c7165 100644
--- a/gpu4pyscf/lib/tests/test_cupy_helper.py
+++ b/gpu4pyscf/lib/tests/test_cupy_helper.py
@@ -72,8 +72,7 @@ def test_sparse(self):
def test_dist_matrix(self):
a = cupy.random.rand(4, 3)
rij = cupy.sum((a[:,None,:] - a[None,:,:])**2, axis=2)**0.5
-
- rij0 = dist_matrix(a)
+ rij0 = dist_matrix(a, a)
assert cupy.linalg.norm(rij - rij0) < 1e-10
def test_takebak(self):
diff --git a/gpu4pyscf/lib/tests/test_cutensor.py b/gpu4pyscf/lib/tests/test_cutensor.py
index 7cbd08d5..ca338331 100644
--- a/gpu4pyscf/lib/tests/test_cutensor.py
+++ b/gpu4pyscf/lib/tests/test_cutensor.py
@@ -16,7 +16,7 @@
import unittest
import numpy
import cupy
-from gpu4pyscf.lib.cupy_helper import *
+from gpu4pyscf.lib.cupy_helper import contract
class KnownValues(unittest.TestCase):
def test_contract(self):
@@ -49,7 +49,7 @@ def test_cache(self):
c = contract('ijkl,jl->ik', a, b)
c_einsum = cupy.einsum('ijkl,jl->ik', a, b)
assert cupy.linalg.norm(c - c_einsum) < 1e-10
-
+
if __name__ == "__main__":
print("Full tests for cutensor module")
unittest.main()
\ No newline at end of file
diff --git a/gpu4pyscf/solvent/grad/pcm.py b/gpu4pyscf/solvent/grad/pcm.py
index cf6cda8f..a2149c4d 100644
--- a/gpu4pyscf/solvent/grad/pcm.py
+++ b/gpu4pyscf/solvent/grad/pcm.py
@@ -24,9 +24,10 @@
from pyscf import lib
from pyscf import gto, df
from pyscf.grad import rhf as rhf_grad
+from pyscf.solvent import ddcosmo_grad
+
from gpu4pyscf.solvent.pcm import PI, switch_h
from gpu4pyscf.df import int3c2e
-
from gpu4pyscf.lib.cupy_helper import contract
from gpu4pyscf.lib import logger
@@ -193,7 +194,6 @@ def grad_qv(pcmobj, dm):
intopt.build(1e-14, diag_block_with_triu=True, aosym=False)
coeff = intopt.coeff
dm_cart = coeff @ dm @ coeff.T
- #dm_cart = cupy.einsum('pi,ij,qj->pq', coeff, dm, coeff)
dvj, _ = int3c2e.get_int3c2e_ip_jk(intopt, 0, 'ip1', q_sym, None, dm_cart)
dq, _ = int3c2e.get_int3c2e_ip_jk(intopt, 0, 'ip2', q_sym, None, dm_cart)
@@ -232,10 +232,10 @@ def grad_solver(pcmobj, dm):
dF, dA = get_dF_dA(pcmobj.surface)
- with_D = pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']
+ with_D = pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE', 'SMD']
dD, dS, dSii = get_dD_dS(pcmobj.surface, dF, with_D=with_D, with_S=True)
- if pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']:
+ if pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE', 'SMD']:
DA = D*A
epsilon = pcmobj.eps
@@ -253,7 +253,7 @@ def grad_solver(pcmobj, dm):
de -= cupy.asarray([cupy.sum(de_dS[p0:p1], axis=0) for p0,p1, in gridslice])
de -= 0.5*contract('i,xij->jx', vK_1*q, dSii) # 0.5*cupy.einsum('i,xij,i->jx', vK_1, dSii, q)
- elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']:
+ elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE', 'SMD']:
dD = dD.transpose([2,0,1])
dS = dS.transpose([2,0,1])
dSii = dSii.transpose([2,0,1])
@@ -315,41 +315,51 @@ def contract_ket(a, B, c):
return de.get()
def make_grad_object(grad_method):
- '''
- return solvent gradient object
- '''
- grad_method_class = grad_method.__class__
- class WithSolventGrad(grad_method_class):
- def __init__(self, grad_method):
- self.__dict__.update(grad_method.__dict__)
- self.de_solvent = None
- self.de_solute = None
- self._keys = self._keys.union(['de_solvent', 'de_solute'])
-
- def kernel(self, *args, dm=None, atmlst=None, **kwargs):
- dm = kwargs.pop('dm', None)
- if dm is None:
- dm = self.base.make_rdm1(ao_repr=True)
-
- self.de_solvent = grad_qv(self.base.with_solvent, dm)
- self.de_solvent+= grad_solver(self.base.with_solvent, dm)
- self.de_solvent+= grad_nuc(self.base.with_solvent, dm)
-
- self.de_solute = grad_method_class.kernel(self, *args, **kwargs)
- self.de = self.de_solute + self.de_solvent
-
- if self.verbose >= logger.NOTE:
- logger.note(self, '--------------- %s (+%s) gradients ---------------',
- self.base.__class__.__name__,
- self.base.with_solvent.__class__.__name__)
- rhf_grad._write(self, self.mol, self.de, self.atmlst)
- logger.note(self, '----------------------------------------------')
- return self.de
-
- def _finalize(self):
- # disable _finalize. It is called in grad_method.kernel method
- # where self.de was not yet initialized.
- pass
-
- return WithSolventGrad(grad_method)
-
+ '''For grad_method in vacuum, add nuclear gradients of solvent pcmobj'''
+ if grad_method.base.with_solvent.frozen:
+ raise RuntimeError('Frozen solvent model is not avialbe for energy gradients')
+
+ name = (grad_method.base.with_solvent.__class__.__name__
+ + grad_method.__class__.__name__)
+ return lib.set_class(WithSolventGrad(grad_method),
+ (WithSolventGrad, grad_method.__class__), name)
+
+class WithSolventGrad:
+ _keys = {'de_solvent', 'de_solute'}
+
+ def __init__(self, grad_method):
+ self.__dict__.update(grad_method.__dict__)
+ self.de_solvent = None
+ self.de_solute = None
+
+ def undo_solvent(self):
+ cls = self.__class__
+ name_mixin = self.base.with_solvent.__class__.__name__
+ obj = lib.view(self, lib.drop_class(cls, WithSolventGrad, name_mixin))
+ del obj.de_solvent
+ del obj.de_solute
+ return obj
+
+ def kernel(self, *args, dm=None, atmlst=None, **kwargs):
+ dm = kwargs.pop('dm', None)
+ if dm is None:
+ dm = self.base.make_rdm1(ao_repr=True)
+
+ self.de_solute = super().kernel(*args, **kwargs)
+ self.de_solvent = grad_qv(self.base.with_solvent, dm)
+ self.de_solvent+= grad_solver(self.base.with_solvent, dm)
+ self.de_solvent+= grad_nuc(self.base.with_solvent, dm)
+ self.de = self.de_solute + self.de_solvent
+
+ if self.verbose >= logger.NOTE:
+ logger.note(self, '--------------- %s (+%s) gradients ---------------',
+ self.base.__class__.__name__,
+ self.base.with_solvent.__class__.__name__)
+ rhf_grad._write(self, self.mol, self.de, self.atmlst)
+ logger.note(self, '----------------------------------------------')
+ return self.de
+
+ def _finalize(self):
+ # disable _finalize. It is called in grad_method.kernel method
+ # where self.de was not yet initialized.
+ pass
\ No newline at end of file
diff --git a/gpu4pyscf/solvent/grad/smd.py b/gpu4pyscf/solvent/grad/smd.py
new file mode 100644
index 00000000..60e25a22
--- /dev/null
+++ b/gpu4pyscf/solvent/grad/smd.py
@@ -0,0 +1,279 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+'''
+Gradient of PCM family solvent model
+'''
+# pylint: disable=C0103
+
+import numpy as np
+import cupy
+#from cupyx import scipy, jit
+from pyscf import lib
+from pyscf import gto, df
+from pyscf.grad import rhf as rhf_grad
+from pyscf.data import radii
+from pyscf.solvent import ddcosmo_grad
+
+from gpu4pyscf.solvent import pcm, smd
+from gpu4pyscf.solvent.grad import pcm as pcm_grad
+from gpu4pyscf.solvent.smd import (
+ sigma_water, sigma_n, sigma_alpha, sigma_beta, r_zz, swtich_function,
+ hartree2kcal)
+from gpu4pyscf.df import int3c2e
+from gpu4pyscf.lib.cupy_helper import contract
+from gpu4pyscf.lib import logger
+
+def grad_swtich_function(R, r, dr):
+ if R < r + dr:
+ return -np.exp(dr/(R-dr-r)) * dr / (R-dr-r)**2
+ else:
+ return 0.0
+
+def atomic_surface_tension(symbols, coords, n, alpha, beta, water=True):
+ '''
+ - list of atomic symbols
+ - atomic coordinates in Anstrong
+ - solvent descriptors: n, alpha, beta
+ '''
+
+ def get_bond_tension(bond):
+ if water:
+ return sigma_water.get(bond, 0.0)
+ t = 0.0
+ t += sigma_n.get(bond, 0.0) * n
+ t += sigma_alpha.get(bond, 0.0) * alpha
+ t += sigma_beta.get(bond, 0.0) * beta
+ return t
+
+ def get_atom_tension(sym_i):
+ if water:
+ return sigma_water.get(sym_i, 0.0)
+ t = 0.0
+ t += sigma_n.get(sym_i, 0.0) * n
+ t += sigma_alpha.get(sym_i, 0.0) * alpha
+ t += sigma_beta.get(sym_i, 0.0) * beta
+ return t
+ natm = coords.shape[0]
+ ri_rj = coords[:,None,:] - coords[None,:,:]
+ rij = np.sum(ri_rj**2, axis=2)**0.5
+ np.fill_diagonal(rij, 1)
+ drij = ri_rj / np.expand_dims(rij, axis=-1)
+ tensions = []
+ for i, sym_i in enumerate(symbols):
+ if sym_i not in ['H', 'C', 'N', 'O', 'F', 'Si', 'S', 'Cl', 'Br']:
+ tensions.append(np.zeros([natm,3]))
+ continue
+
+ tension = np.zeros([natm,3])
+ if sym_i in ['F', 'Si', 'S', 'Cl', 'Br']:
+ tensions.append(tension)
+ continue
+
+ if sym_i == 'H':
+ dt_HC = np.zeros([natm,3])
+ dt_HO = np.zeros([natm,3])
+ for j, sym_j in enumerate(symbols):
+ if sym_j == 'C':
+ r, dr = r_zz.get(('H','C'), (0.0, 0.0))
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_HC[i] += dt_drij
+ dt_HC[j] -= dt_drij
+ if sym_j == 'O':
+ r, dr = r_zz.get(('H','O'), (0.0, 0.0))
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_HO[i] += dt_drij
+ dt_HO[j] -= dt_drij
+ sig_HC = get_bond_tension(('H','C'))
+ sig_HO = get_bond_tension(('H','O'))
+ tension += sig_HC * dt_HC + sig_HO * dt_HO
+ tensions.append(tension)
+ continue
+
+ if sym_i == 'C':
+ dt_CC = np.zeros([natm,3])
+ dt_CN = np.zeros([natm,3])
+ t_CN = 0.0
+ for j, sym_j in enumerate(symbols):
+ if sym_j == 'C' and i != j:
+ r, dr = r_zz.get(('C', 'C'), (0.0, 0.0))
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_CC[i] += dt_drij
+ dt_CC[j] -= dt_drij
+ if sym_j == 'N':
+ r, dr = r_zz.get(('C', 'N'), (0.0, 0.0))
+ t_CN += swtich_function(rij[i,j], r, dr)
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_CN[i] += dt_drij
+ dt_CN[j] -= dt_drij
+ sig_CC = get_bond_tension(('C','C'))
+ sig_CN = get_bond_tension(('C','N'))
+ tension += sig_CC * dt_CC + sig_CN * (2 * t_CN * dt_CN)
+ tensions.append(tension)
+ continue
+
+ if sym_i == 'N':
+ t_NC = 0.0
+ dt_NC = np.zeros([natm,3])
+ dt_NC3 = np.zeros([natm,3])
+ for j, sym_j in enumerate(symbols):
+ if sym_j == 'C':
+ r, dr = r_zz.get(('N','C'), (0.0, 0.0))
+ tk = 0.0
+ dtk = np.zeros([natm,3])
+ for k, sym_k in enumerate(symbols):
+ if k != i and k != j:
+ rjk, drjk = r_zz.get(('C', sym_k), (0.0, 0.0))
+ tk += swtich_function(rij[j,k], rjk, drjk)
+ dtk_rjk = grad_swtich_function(rij[j,k], rjk, drjk) * drij[j,k]
+ dtk[j] += dtk_rjk
+ dtk[k] -= dtk_rjk
+
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] * tk**2
+ dt_NC[i] += dt_drij
+ dt_NC[j] -= dt_drij
+
+ t = swtich_function(rij[i,j], r, dr)
+ dt_NC += t * (2 * tk * dtk)
+ t_NC += t * tk**2
+
+ r, dr = r_zz.get(('N','C3'), (0.0, 0.0))
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_NC3[i] += dt_drij
+ dt_NC3[j] -= dt_drij
+ sig_NC = get_bond_tension(('N','C'))
+ sig_NC3= get_bond_tension(('N','C3'))
+ tension += sig_NC * (1.3 * t_NC**0.3 * dt_NC) + sig_NC3 * dt_NC3
+ tensions.append(tension)
+ continue
+
+ if sym_i == 'O':
+ dt_OC = np.zeros([natm,3])
+ dt_ON = np.zeros([natm,3])
+ dt_OO = np.zeros([natm,3])
+ dt_OP = np.zeros([natm,3])
+ for j, sym_j in enumerate(symbols):
+ if sym_j == 'C':
+ r, dr = r_zz.get(('O','C'), (0.0, 0.0))
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_OC[i] += dt_drij
+ dt_OC[j] -= dt_drij
+ if sym_j == 'N':
+ r, dr = r_zz.get(('O','N'), (0.0, 0.0))
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_ON[i] += dt_drij
+ dt_ON[j] -= dt_drij
+ if sym_j == 'O' and j != i:
+ r, dr = r_zz.get(('O','O'), (0.0, 0.0))
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_OO[i] += dt_drij
+ dt_OO[j] -= dt_drij
+ if sym_j == 'P':
+ r, dr = r_zz.get(('O','P'), (0.0, 0.0))
+ dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j]
+ dt_OP[i] += dt_drij
+ dt_OP[j] -= dt_drij
+ sig_OC = get_bond_tension(('O','C'))
+ sig_ON = get_bond_tension(('O','N'))
+ sig_OO = get_bond_tension(('O','O'))
+ sig_OP = get_bond_tension(('O','P'))
+ tension += sig_OC * dt_OC + sig_ON * dt_ON + sig_OO * dt_OO + sig_OP * dt_OP
+ tensions.append(tension)
+ continue
+
+ return cupy.asarray(tensions)
+
+def get_cds(smdobj):
+ mol = smdobj.mol
+ n, _, alpha, beta, gamma, _, phi, psi = smdobj.solvent_descriptors
+ symbols = [mol.atom_symbol(ia) for ia in range(mol.natm)]
+ coords = mol.atom_coords(unit='A')
+ if smdobj._solvent.lower() != 'water':
+ grad_tension = atomic_surface_tension(symbols, coords, n, alpha, beta, water=False)
+ atm_tension = smd.atomic_surface_tension(symbols, coords, n, alpha, beta, water=False)
+ mol_tension = smd.molecular_surface_tension(beta, gamma, phi, psi)
+ else:
+ grad_tension = atomic_surface_tension(symbols, coords, n, alpha, beta, water=True)
+ atm_tension = smd.atomic_surface_tension(symbols, coords, n, alpha, beta, water=True)
+ mol_tension = 0.0
+
+ # generate surface for calculating SASA
+ rad = radii.VDW + 0.4/radii.BOHR
+ surface = pcm.gen_surface(mol, ng=smdobj.sasa_ng, rad=rad)
+ _, grad_area = pcm_grad.get_dF_dA(surface)
+ area = surface['area']
+ gridslice = surface['gslice_by_atom']
+ SASA = cupy.asarray([cupy.sum(area[p0:p1], axis=0) for p0,p1, in gridslice]).get()
+ grad_SASA = cupy.asarray([cupy.sum(grad_area[p0:p1], axis=0) for p0,p1, in gridslice]).get()
+ SASA *= radii.BOHR**2
+ grad_SASA *= radii.BOHR**2
+ mol_cds = mol_tension * np.sum(grad_SASA, axis=0) / 1000
+ grad_tension *= radii.BOHR
+ atm_cds = np.einsum('i,ijx->jx', SASA, grad_tension.get()) / 1000
+ atm_cds+= np.einsum('i,ijx->jx', atm_tension.get(), grad_SASA) / 1000
+ return (mol_cds + atm_cds)/hartree2kcal # hartree
+
+def make_grad_object(grad_method):
+ '''For grad_method in vacuum, add nuclear gradients of solvent pcmobj'''
+ if grad_method.base.with_solvent.frozen:
+ raise RuntimeError('Frozen solvent model is not avialbe for energy gradients')
+
+ name = (grad_method.base.with_solvent.__class__.__name__
+ + grad_method.__class__.__name__)
+ return lib.set_class(WithSolventGrad(grad_method),
+ (WithSolventGrad, grad_method.__class__), name)
+
+class WithSolventGrad:
+ _keys = {'de_solvent', 'de_solute'}
+
+ def __init__(self, grad_method):
+ self.__dict__.update(grad_method.__dict__)
+ self.de_solvent = None
+ self.de_solute = None
+
+ def undo_solvent(self):
+ cls = self.__class__
+ name_mixin = self.base.with_solvent.__class__.__name__
+ obj = lib.view(self, lib.drop_class(cls, WithSolventGrad, name_mixin))
+ del obj.de_solvent
+ del obj.de_solute
+ return obj
+
+ def kernel(self, *args, dm=None, atmlst=None, **kwargs):
+ dm = kwargs.pop('dm', None)
+ if dm is None:
+ dm = self.base.make_rdm1(ao_repr=True)
+
+ self.de_solute = super().kernel(*args, **kwargs)
+ self.de_solvent = pcm_grad.grad_qv(self.base.with_solvent, dm)
+ self.de_solvent+= pcm_grad.grad_solver(self.base.with_solvent, dm)
+ self.de_solvent+= pcm_grad.grad_nuc(self.base.with_solvent, dm)
+
+ self.de = self.de_solute + self.de_solvent
+ self.de += get_cds(self.base.with_solvent)
+ if self.verbose >= logger.NOTE:
+ logger.note(self, '--------------- %s (+%s) gradients ---------------',
+ self.base.__class__.__name__,
+ self.base.with_solvent.__class__.__name__)
+ rhf_grad._write(self, self.mol, self.de, self.atmlst)
+ logger.note(self, '----------------------------------------------')
+ return self.de
+
+ def _finalize(self):
+ # disable _finalize. It is called in grad_method.kernel method
+ # where self.de was not yet initialized.
+ pass
+
+
diff --git a/gpu4pyscf/solvent/hessian/pcm.py b/gpu4pyscf/solvent/hessian/pcm.py
index ae28942c..4c5c4ba7 100644
--- a/gpu4pyscf/solvent/hessian/pcm.py
+++ b/gpu4pyscf/solvent/hessian/pcm.py
@@ -131,7 +131,11 @@ def pcm_grad_scanner(mol):
pcmobj.reset(mol)
e, v = pcmobj._get_vind(dm)
#return grad_elec(pcmobj, dm)
- return grad_nuc(pcmobj, dm) + grad_solver(pcmobj, dm) + grad_qv(pcmobj, dm)
+ pcm_grad = grad_nuc(pcmobj, dm)
+ pcm_grad+= grad_solver(pcmobj, dm)
+ pcm_grad+= grad_qv(pcmobj, dm)
+ return pcm_grad
+
mol.verbose = 0
de = numpy.zeros([mol.natm, mol.natm, 3, 3])
eps = 1e-3
@@ -224,44 +228,49 @@ def analytic_grad_vmat(pcmobj, mo_coeff, mo_occ, atmlst=None, verbose=None):
pcmobj.reset(pmol)
return vmat
"""
+
def make_hess_object(hess_method):
- '''
- return solvent hessian object
- '''
- hess_method_class = hess_method.__class__
- class WithSolventHess(hess_method_class):
- def __init__(self, hess_method):
- self.__dict__.update(hess_method.__dict__)
- self.de_solvent = None
- self.de_solute = None
- self._keys = self._keys.union(['de_solvent', 'de_solute'])
-
- def kernel(self, *args, dm=None, atmlst=None, **kwargs):
- dm = kwargs.pop('dm', None)
- if dm is None:
- dm = self.base.make_rdm1(ao_repr=True)
- is_equilibrium = self.base.with_solvent.equilibrium_solvation
- self.base.with_solvent.equilibrium_solvation = True
- self.de_solvent = hess_elec(self.base.with_solvent, dm, verbose=self.verbose)
- #self.de_solvent+= hess_nuc(self.base.with_solvent)
- self.de_solute = hess_method_class.kernel(self, *args, **kwargs)
- self.de = self.de_solute + self.de_solvent
- self.base.with_solvent.equilibrium_solvation = is_equilibrium
- return self.de
-
- def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
- if atmlst is None:
- atmlst = range(self.mol.natm)
- h1ao = hess_method_class.make_h1(self, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose)
- dv = fd_grad_vmat(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose)
- for i0, ia in enumerate(atmlst):
- h1ao[i0] += dv[i0]
- return h1ao
-
- def _finalize(self):
- # disable _finalize. It is called in grad_method.kernel method
- # where self.de was not yet initialized.
- pass
-
- return WithSolventHess(hess_method)
+ if hess_method.base.with_solvent.frozen:
+ raise RuntimeError('Frozen solvent model is not avialbe for energy hessian')
+
+ name = (hess_method.base.with_solvent.__class__.__name__
+ + hess_method.__class__.__name__)
+ return lib.set_class(WithSolventHess(hess_method),
+ (WithSolventHess, hess_method.__class__), name)
+
+class WithSolventHess:
+ _keys = {'de_solvent', 'de_solute'}
+
+ def __init__(self, hess_method):
+ self.__dict__.update(hess_method.__dict__)
+ self.de_solvent = None
+ self.de_solute = None
+
+ def kernel(self, *args, dm=None, atmlst=None, **kwargs):
+ dm = kwargs.pop('dm', None)
+ if dm is None:
+ dm = self.base.make_rdm1(ao_repr=True)
+ is_equilibrium = self.base.with_solvent.equilibrium_solvation
+ self.base.with_solvent.equilibrium_solvation = True
+ self.de_solvent = hess_elec(self.base.with_solvent, dm, verbose=self.verbose)
+ #self.de_solvent+= hess_nuc(self.base.with_solvent)
+ self.de_solute = super().kernel(*args, **kwargs)
+ self.de = self.de_solute + self.de_solvent
+ self.base.with_solvent.equilibrium_solvation = is_equilibrium
+ return self.de
+
+ def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
+ if atmlst is None:
+ atmlst = range(self.mol.natm)
+ h1ao = super().make_h1(mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose)
+ dv = fd_grad_vmat(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose)
+ for i0, ia in enumerate(atmlst):
+ h1ao[i0] += dv[i0]
+ return h1ao
+
+ def _finalize(self):
+ # disable _finalize. It is called in grad_method.kernel method
+ # where self.de was not yet initialized.
+ pass
+
diff --git a/gpu4pyscf/solvent/hessian/smd.py b/gpu4pyscf/solvent/hessian/smd.py
new file mode 100644
index 00000000..af41595b
--- /dev/null
+++ b/gpu4pyscf/solvent/hessian/smd.py
@@ -0,0 +1,303 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+'''
+Gradient of PCM family solvent model
+'''
+# pylint: disable=C0103
+
+import numpy as np
+import cupy
+#from cupyx import scipy, jit
+from pyscf import lib
+from pyscf import gto, df
+from pyscf.grad import rhf as rhf_grad
+from pyscf.data import radii
+from pyscf.solvent import ddcosmo_grad
+
+from gpu4pyscf.solvent import pcm, smd
+from gpu4pyscf.solvent.grad import pcm as pcm_grad
+from gpu4pyscf.solvent.grad import smd as smd_grad
+from gpu4pyscf.solvent.hessian import pcm as pcm_hess
+
+from gpu4pyscf.solvent.smd import (
+ sigma_water, sigma_n, sigma_alpha, sigma_beta, r_zz, swtich_function,
+ hartree2kcal)
+from gpu4pyscf.df import int3c2e
+from gpu4pyscf.lib.cupy_helper import contract
+from gpu4pyscf.lib import logger
+
+def hess_swtich_function(R, r, dr):
+ if R < r + dr:
+ dist = (R[0]**2 + R[1]**2 + R[2]**2)**.5
+ hess = [
+ [R[1]**2+R[2]**2, -R[0]*R[1], -R[0]*R[2]],
+ [-R[0]*R[1], R[0]**2+R[2]**2, -R[1]*R[2]],
+ [-R[0]*R[2], -R[1]*R[2], R[0]**2+R[1]**2]]
+ return np.asarray(hess)/dist
+ else:
+ return np.zeros([3,3])
+
+def atomic_surface_tension(symbols, coords, n, alpha, beta, water=True):
+ '''
+ TODO: debug later
+ - list of atomic symbols
+ - atomic coordinates in Anstrong
+ - solvent descriptors: n, alpha, beta
+ '''
+
+ def get_bond_tension(bond):
+ if water:
+ return sigma_water.get(bond, 0.0)
+ t = 0.0
+ t += sigma_n.get(bond, 0.0) * n
+ t += sigma_alpha.get(bond, 0.0) * alpha
+ t += sigma_beta.get(bond, 0.0) * beta
+ return t
+
+ def get_atom_tension(sym_i):
+ if water:
+ return sigma_water.get(sym_i, 0.0)
+ t = 0.0
+ t += sigma_n.get(sym_i, 0.0) * n
+ t += sigma_alpha.get(sym_i, 0.0) * alpha
+ t += sigma_beta.get(sym_i, 0.0) * beta
+ return t
+ natm = coords.shape[0]
+ ri_rj = coords[:,None,:] - coords[None,:,:]
+ rij = np.sum(ri_rj**2, axis=2)**0.5
+ np.fill_diagonal(rij, 1)
+ #drij = ri_rj / np.expand_dims(rij, axis=-1)
+ tensions = []
+ for i, sym_i in enumerate(symbols):
+ if sym_i not in ['H', 'C', 'N', 'O', 'F', 'Si', 'S', 'Cl', 'Br']:
+ tensions.append(np.zeros([natm,3]))
+ continue
+
+ tension = np.zeros([natm,3])
+ if sym_i in ['F', 'Si', 'S', 'Cl', 'Br']:
+ tensions.append(tension)
+ continue
+
+ if sym_i == 'H':
+ dt_HC = np.zeros([natm,natm,3,3])
+ dt_HO = np.zeros([natm,natm,3,3])
+ for j, sym_j in enumerate(symbols):
+ if sym_j == 'C':
+ r, dr = r_zz.get(('H','C'), (0.0, 0.0))
+ dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ dt_HC[i,i] += dt_drij
+ dt_HC[i,j] -= dt_drij
+ dt_HC[j,i] -= dt_drij
+ dt_HC[j,j] += dt_drij
+ if sym_j == 'O':
+ r, dr = r_zz.get(('H','O'), (0.0, 0.0))
+ dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ dt_HO[i,i] += dt_drij
+ dt_HO[i,j] -= dt_drij
+ dt_HO[j,i] -= dt_drij
+ dt_HO[j,j] += dt_drij
+ sig_HC = get_bond_tension(('H','C'))
+ sig_HO = get_bond_tension(('H','O'))
+ tension += sig_HC * dt_HC + sig_HO * dt_HO
+ tensions.append(tension)
+
+ if sym_i == 'C':
+ dt_CN = np.zeros([natm,3])
+ d2t_CC = np.zeros([natm,natm,3,3])
+ d2t_CN = np.zeros([natm,natm,3,3])
+ t_CN = 0.0
+ for j, sym_j in enumerate(symbols):
+ if sym_j == 'C' and i != j:
+ r, dr = r_zz.get(('C', 'C'), (0.0, 0.0))
+ d2t_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ d2t_CC[i,i] += d2t_drij
+ d2t_CC[i,j] -= d2t_drij
+ d2t_CC[j,i] -= d2t_drij
+ d2t_CC[j,j] += d2t_drij
+ if sym_j == 'N':
+ r, dr = r_zz.get(('C', 'N'), (0.0, 0.0))
+ t_CN += swtich_function(rij[i,j], r, dr)
+ dt_drij = smd_grad.grad_switch_function(coords[i]-coords[j], r, dr)
+ dt_CN[i] += dt_drij
+ dt_CN[j] -= dt_drij
+ d2t_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ d2t_CN[i,i] += d2t_drij
+ d2t_CN[i,j] -= d2t_drij
+ d2t_CN[j,i] -= d2t_drij
+ d2t_CN[j,j] += d2t_drij
+ sig_CC = get_bond_tension(('C','C'))
+ sig_CN = get_bond_tension(('C','N'))
+ tension += sig_CC * d2t_CC + sig_CN * (2 * t_CN * d2t_CN + 2 * np.einsum('i,j->ij', dt_drij[i], dt_drij[j]))
+ tensions.append(tension)
+
+ if sym_i == 'N':
+ t_NC = 0.0
+ dt_NC = np.zeros([natm,natm,3,3])
+ dt_NC3 = np.zeros([natm,natm,3,3])
+ for j, sym_j in enumerate(symbols):
+ if sym_j == 'C':
+ r, dr = r_zz.get(('N','C'), (0.0, 0.0))
+ tk = 0.0
+ dtk = np.zeros([natm,natm,3,3])
+ for k, sym_k in enumerate(symbols):
+ if k != i and k != j:
+ rjk, drjk = r_zz.get(('C', sym_k), (0.0, 0.0))
+ tk += swtich_function(rij[j,k], rjk, drjk)
+ dtk_rjk = hess_swtich_function(coords[j]-coords[k], rjk, drjk)
+ dtk[j,j] += dtk_rjk
+ dtk[j,k] -= dtk_rjk
+ dtk[k,j] -= dtk_rjk
+ dtk[k,k] += dtk_rjk
+ dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) * tk**2
+ dt_NC[i,i] += dt_drij
+ dt_NC[i,j] -= dt_drij
+ dt_NC[j,i] -= dt_drij
+ dt_NC[j,j] += dt_drij
+ t = swtich_function(coords[i]-coords[j], r, dr)
+ dt_NC += t * (2 * tk * dtk)
+ t_NC += t * tk**2
+
+ r, dr = r_zz.get(('N','C3'), (0.0, 0.0))
+ dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ dt_NC3[i,i] += dt_drij
+ dt_NC3[i,j] -= dt_drij
+ dt_NC3[j,i] -= dt_drij
+ dt_NC3[j,j] += dt_drij
+ sig_NC = get_bond_tension(('N','C'))
+ sig_NC3= get_bond_tension(('N','C3'))
+ tension += sig_NC * (1.3 * t_NC**0.3 * dt_NC) + sig_NC3 * dt_NC3
+ tensions.append(tension)
+
+ if sym_i == 'O':
+ dt_OC = np.zeros([natm,natm,3,3])
+ dt_ON = np.zeros([natm,natm,3,3])
+ dt_OO = np.zeros([natm,natm,3,3])
+ dt_OP = np.zeros([natm,natm,3,3])
+ for j, sym_j in enumerate(symbols):
+ if sym_j == 'C':
+ r, dr = r_zz.get(('O','C'), (0.0, 0.0))
+ dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ dt_OC[i,i] += dt_drij
+ dt_OC[i,j] -= dt_drij
+ dt_OC[j,i] -= dt_drij
+ dt_OC[j,j] += dt_drij
+ if sym_j == 'N':
+ r, dr = r_zz.get(('O','N'), (0.0, 0.0))
+ dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ dt_ON[i,i] += dt_drij
+ dt_ON[i,j] -= dt_drij
+ dt_ON[j,i] -= dt_drij
+ dt_ON[j,j] += dt_drij
+ if sym_j == 'O' and j != i:
+ r, dr = r_zz.get(('O','O'), (0.0, 0.0))
+ dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ dt_OO[i,i] += dt_drij
+ dt_OO[i,j] -= dt_drij
+ dt_OO[j,i] -= dt_drij
+ dt_OO[j,j] += dt_drij
+ if sym_j == 'P':
+ r, dr = r_zz.get(('O','P'), (0.0, 0.0))
+ dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr)
+ dt_OP[i,i] += dt_drij
+ dt_OP[i,j] -= dt_drij
+ dt_OP[j,i] -= dt_drij
+ dt_OP[j,j] += dt_drij
+ sig_OC = get_bond_tension(('O','C'))
+ sig_ON = get_bond_tension(('O','N'))
+ sig_OO = get_bond_tension(('O','O'))
+ sig_OP = get_bond_tension(('O','P'))
+ tension += sig_OC * dt_OC + sig_ON * dt_ON + sig_OO * dt_OO + sig_OP * dt_OP
+ tensions.append(tension)
+ return cupy.asarray(tensions)
+
+def get_cds(smdobj):
+ mol = smdobj.mol
+ solvent = smdobj.solvent
+ def smd_grad_scanner(mol):
+ smdobj_tmp = smd.SMD(mol)
+ smdobj_tmp.solvent = solvent
+ return smd_grad.get_cds(smdobj_tmp)
+
+ log = logger.new_logger(mol, mol.verbose)
+ t1 = log.init_timer()
+
+ eps = 1e-4
+ natm = mol.natm
+ hess_cds = np.zeros([natm,natm,3,3])
+ for ia in range(mol.natm):
+ for j in range(3):
+ coords = mol.atom_coords(unit='B')
+ coords[ia,j] += eps
+ mol.set_geom_(coords, unit='B')
+ mol.build()
+ grad0_cds = smd_grad_scanner(mol)
+
+ coords[ia,j] -= 2.0*eps
+ mol.set_geom_(coords, unit='B')
+ mol.build()
+ grad1_cds = smd_grad_scanner(mol)
+
+ coords[ia,j] += eps
+ mol.set_geom_(coords, unit='B')
+ hess_cds[ia,:,j] = (grad0_cds - grad1_cds) / (2.0 * eps)
+ t1 = log.timer_debug1('solvent energy', *t1)
+ return hess_cds # hartree
+
+def make_hess_object(hess_method):
+ '''For hess_method in vacuum, add nuclear Hessian of solvent smdobj'''
+ if hess_method.base.with_solvent.frozen:
+ raise RuntimeError('Frozen solvent model is not avialbe for energy hessian')
+
+ name = (hess_method.base.with_solvent.__class__.__name__
+ + hess_method.__class__.__name__)
+ return lib.set_class(WithSolventHess(hess_method),
+ (WithSolventHess, hess_method.__class__), name)
+
+class WithSolventHess:
+ _keys = {'de_solvent', 'de_solute'}
+
+ def __init__(self, hess_method):
+ self.__dict__.update(hess_method.__dict__)
+ self.de_solvent = None
+ self.de_solute = None
+
+ def kernel(self, *args, dm=None, atmlst=None, **kwargs):
+ dm = kwargs.pop('dm', None)
+ if dm is None:
+ dm = self.base.make_rdm1(ao_repr=True)
+ is_equilibrium = self.base.with_solvent.equilibrium_solvation
+ self.base.with_solvent.equilibrium_solvation = True
+ self.de_solvent = pcm_hess.hess_elec(self.base.with_solvent, dm, verbose=self.verbose)
+ #self.de_solvent+= hess_nuc(self.base.with_solvent)
+ self.de_solute = super().kernel(*args, **kwargs)
+ self.de = self.de_solute + self.de_solvent
+ self.de += get_cds(self.base.with_solvent)
+ self.base.with_solvent.equilibrium_solvation = is_equilibrium
+ return self.de
+
+ def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
+ if atmlst is None:
+ atmlst = range(self.mol.natm)
+ h1ao = super().make_h1(mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose)
+ dv = pcm_hess.fd_grad_vmat(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose)
+ for i0, ia in enumerate(atmlst):
+ h1ao[i0] += dv[i0]
+ return h1ao
+
+ def _finalize(self):
+ # disable _finalize. It is called in grad_method.kernel method
+ # where self.de was not yet initialized.
+ pass
diff --git a/gpu4pyscf/solvent/pcm.py b/gpu4pyscf/solvent/pcm.py
index 335de1f7..50e3957f 100644
--- a/gpu4pyscf/solvent/pcm.py
+++ b/gpu4pyscf/solvent/pcm.py
@@ -25,7 +25,7 @@
from pyscf import gto, df
from pyscf.dft import gen_grid
from pyscf.data import radii
-from pyscf.solvent import ddcosmo
+from pyscf.solvent import ddcosmo#, _attach_solvent
from gpu4pyscf.solvent import _attach_solvent
from gpu4pyscf.df import int3c2e
from gpu4pyscf.lib import logger
@@ -122,7 +122,8 @@ def gen_surface(mol, ng=302, rad=modified_Bondi, vdw_scale=1.2, r_probe=0.0):
atom_grid = r_vdw * unit_sphere[:,:3] + atom_coords[ia,:]
#riJ = scipy.spatial.distance.cdist(atom_grid[:,:3], atom_coords)
- riJ = cupy.sum((atom_grid[:,None,:] - atom_coords[None,:,:])**2, axis=2)**0.5
+ #riJ = cupy.sum((atom_grid[:,None,:] - atom_coords[None,:,:])**2, axis=2)**0.5
+ riJ = dist_matrix(atom_grid, atom_coords)
diJ = (riJ - R_in_J) / R_sw_J
diJ[:,ia] = 1.0
diJ[diJ<1e-8] = 0.0
@@ -193,7 +194,7 @@ def get_D_S(surface, with_S=True, with_D=False):
xi_ij = xi_i * xi_j / (xi_i**2 + xi_j**2)**0.5
#rij = scipy.spatial.distance.cdist(grid_coords, grid_coords)
#rij = cupy.sum((grid_coords[:,None,:] - grid_coords[None,:,:])**2, axis=2)**0.5
- rij = dist_matrix(grid_coords)
+ rij = dist_matrix(grid_coords, grid_coords)
xi_r_ij = xi_ij * rij
cupy.fill_diagonal(rij, 1)
S = scipy.special.erf(xi_r_ij) / rij
@@ -201,8 +202,13 @@ def get_D_S(surface, with_S=True, with_D=False):
D = None
if with_D:
- drij = cupy.expand_dims(grid_coords, axis=1) - grid_coords
- nrij = cupy.sum(drij * norm_vec, axis=-1)
+ #drij = cupy.expand_dims(grid_coords, axis=1) - grid_coords
+ #nri = cupy.sum(grid_coords * norm_vec, axis=-1)
+ #nrij = cupy.expand_dims(nri, axis=1) - nri
+ #cupy.expand_dims(grid_coords, axis=1) * norm_vec
+ nrij = grid_coords.dot(norm_vec.T) - cupy.sum(grid_coords * norm_vec, axis=-1)
+
+ #nrij = cupy.sum(drij * norm_vec, axis=-1)
D = S*nrij/rij**2 -2.0*xi_r_ij/PI**0.5*cupy.exp(-xi_r_ij**2)*nrij/rij**3
cupy.fill_diagonal(D, -charge_exp * (2.0 / PI)**0.5 / (2.0 * R_vdw))
diff --git a/gpu4pyscf/solvent/smd.py b/gpu4pyscf/solvent/smd.py
index 5fb5a8e0..1cc79f98 100644
--- a/gpu4pyscf/solvent/smd.py
+++ b/gpu4pyscf/solvent/smd.py
@@ -18,12 +18,14 @@
'''
import numpy as np
+import scipy
import cupy
from pyscf import lib
from pyscf.data import radii
from pyscf.dft import gen_grid
from gpu4pyscf.solvent import pcm, _attach_solvent
from gpu4pyscf.lib import logger
+from gpu4pyscf.lib.cupy_helper import dist_matrix
@lib.with_doc(_attach_solvent._for_scf.__doc__)
def smd_for_scf(mf, solvent_obj=None, dm=None):
@@ -353,17 +355,13 @@ def get_atom_tension(sym_i):
t += sigma_beta.get(sym_i, 0.0) * beta
return t
- rij = cupy.sum((coords[:,None,:] - coords[None,:,:])**2, axis=2)**0.5
+ rij = scipy.spatial.distance.cdist(coords, coords)
tensions = []
for i, sym_i in enumerate(symbols):
if sym_i not in ['H', 'C', 'N', 'O', 'F', 'Si', 'S', 'Cl', 'Br']:
tensions.append(0)
continue
- #sig_n = sigma_n.get(sym_i, 0.0)
- #sig_a = sigma_alpha.get(sym_i, 0.0)
- #sig_b = sigma_beta.get(sym_i, 0.0)
- #tension = sig_n * n + sig_a * alpha + sig_b * beta
tension = get_atom_tension(sym_i)
if sym_i in ['F', 'Si', 'S', 'Cl', 'Br']:
tensions.append(tension)
@@ -374,50 +372,53 @@ def get_atom_tension(sym_i):
t_HO = 0.0
for j, sym_j in enumerate(symbols):
if sym_j == 'C':
- r, dr = r_zz['H','C']
+ r, dr = r_zz.get(('H','C'), (0.0, 0.0))
t_HC += swtich_function(rij[i,j], r, dr)
if sym_j == 'O':
- r, dr = r_zz['H','O']
+ r, dr = r_zz.get(('H','O'), (0.0, 0.0))
t_HO += swtich_function(rij[i,j], r, dr)
sig_HC = get_bond_tension(('H','C'))
sig_HO = get_bond_tension(('H','O'))
tension += sig_HC * t_HC + sig_HO * t_HO
tensions.append(tension)
+ continue
if sym_i == 'C':
t_CC = 0.0
t_CN = 0.0
for j, sym_j in enumerate(symbols):
if sym_j == 'C' and i != j:
- r, dr = r_zz['C', 'C']
+ r, dr = r_zz.get(('C', 'C'), (0.0, 0.0))
t_CC += swtich_function(rij[i,j], r, dr)
if sym_j == 'N':
- r, dr = r_zz['C', 'N']
+ r, dr = r_zz.get(('C', 'N'), (0.0, 0.0))
t_CN += swtich_function(rij[i,j], r, dr)
sig_CC = get_bond_tension(('C','C'))
sig_CN = get_bond_tension(('C','N'))
tension += sig_CC * t_CC + sig_CN * t_CN**2
tensions.append(tension)
+ continue
if sym_i == 'N':
t_NC = 0.0
t_NC3 = 0.0
for j, sym_j in enumerate(symbols):
if sym_j == 'C':
- r, dr = r_zz['N','C']
+ r, dr = r_zz.get(('N','C'), (0.0,0.0))
tk = 0.0
for k, sym_k in enumerate(symbols):
if k != i and k != j:
- rjk, drjk = r_zz['C', sym_k]
+ rjk, drjk = r_zz.get(('C', sym_k), (0.0,0.0))
tk += swtich_function(rij[j,k], rjk, drjk)
t_NC += swtich_function(rij[i,j], r, dr) * tk**2
- r, dr = r_zz['N','C3']
+ r, dr = r_zz.get(('N','C3'), (0.0, 0.0))
t_NC3 += swtich_function(rij[i,j], r, dr)
sig_NC = get_bond_tension(('N','C'))
sig_NC3= get_bond_tension(('N','C3'))
tension += sig_NC * t_NC**1.3 + sig_NC3 * t_NC3
tensions.append(tension)
+ continue
if sym_i == 'O':
t_OC = 0.0
@@ -426,16 +427,16 @@ def get_atom_tension(sym_i):
t_OP = 0.0
for j, sym_j in enumerate(symbols):
if sym_j == 'C':
- r, dr = r_zz['O','C']
+ r, dr = r_zz.get(('O','C'), (0.0, 0.0))
t_OC += swtich_function(rij[i,j], r, dr)
if sym_j == 'N':
- r, dr = r_zz['O','N']
+ r, dr = r_zz.get(('O','N'), (0.0, 0.0))
t_ON += swtich_function(rij[i,j], r, dr)
if sym_j == 'O' and j != i:
- r, dr = r_zz['O','O']
+ r, dr = r_zz.get(('O','O'), (0.0, 0.0))
t_OO += swtich_function(rij[i,j], r, dr)
if sym_j == 'P':
- r, dr = r_zz['O','P']
+ r, dr = r_zz.get(('O','P'), (0.0, 0.0))
t_OP += swtich_function(rij[i,j], r, dr)
sig_OC = get_bond_tension(('O','C'))
sig_ON = get_bond_tension(('O','N'))
@@ -443,6 +444,7 @@ def get_atom_tension(sym_i):
sig_OP = get_bond_tension(('O','P'))
tension += sig_OC * t_OC + sig_ON * t_ON + sig_OO * t_OO + sig_OP * t_OP
tensions.append(tension)
+ continue
return cupy.asarray(tensions)
def molecular_surface_tension(beta, gamma, phi, psi):
@@ -486,23 +488,23 @@ def get_cds(smdobj):
# generate surface for calculating SASA
rad = radii.VDW + 0.4/radii.BOHR
- surface = pcm.gen_surface(mol, ng=302, rad=rad)
+ surface = pcm.gen_surface(mol, ng=smdobj.sasa_ng, rad=rad)
area = surface['area']
gridslice = surface['gslice_by_atom']
SASA = cupy.asarray([cupy.sum(area[p0:p1], axis=0) for p0,p1, in gridslice])
SASA *= radii.BOHR**2
mol_cds = mol_tension * cupy.sum(SASA) / 1000 # in kcal/mol
atm_cds = cupy.sum(SASA * atm_tension) / 1000 # in kcal/mol
-
return (mol_cds + atm_cds)/hartree2kcal # hartree
class SMD(pcm.PCM):
_keys = {
- 'intopt', 'method', 'e_cds', 'solvent_descriptors', 'r_probe'
+ 'intopt', 'method', 'e_cds', 'solvent_descriptors', 'r_probe', 'sasa_ng'
}
def __init__(self, mol, solvent=''):
super().__init__(mol)
self.vdw_scale = 1.0
+ self.sasa_ng = 590 # quadrature grids for calculating SASA
self.r_probe = 0.4/radii.BOHR
self.method = 'SMD' # use IEFPCM for electrostatic
if solvent not in solvent_db:
@@ -566,10 +568,24 @@ def get_cds(self):
return get_cds(self)
def nuc_grad_method(self, grad_method):
- raise RuntimeError('SMD gradient is not implemented')
+ from gpu4pyscf.solvent.grad import smd as smd_grad
+ if self.frozen:
+ raise RuntimeError('Frozen solvent model is not supported')
+ from gpu4pyscf import scf
+ if isinstance(grad_method.base, scf.hf.RHF):
+ return smd_grad.make_grad_object(grad_method)
+ else:
+ raise RuntimeError('Only SCF gradient is supported')
def Hessian(self, hess_method):
- raise RuntimeError('SMD Hessian is not implemented')
+ from gpu4pyscf.solvent.hessian import smd as smd_hess
+ if self.frozen:
+ raise RuntimeError('Frozen solvent model is not supported')
+ from gpu4pyscf import scf
+ if isinstance(hess_method.base, scf.hf.RHF):
+ return smd_hess.make_hess_object(hess_method)
+ else:
+ raise RuntimeError('Only SCF gradient is supported')
def reset(self, mol=None):
super().reset(mol)
diff --git a/gpu4pyscf/solvent/tests/test_smd.py b/gpu4pyscf/solvent/tests/test_smd.py
index 45e2028a..b322f4ce 100644
--- a/gpu4pyscf/solvent/tests/test_smd.py
+++ b/gpu4pyscf/solvent/tests/test_smd.py
@@ -40,20 +40,23 @@ def tearDownModule():
class KnownValues(unittest.TestCase):
def test_cds_solvent(self):
smdobj = smd.SMD(mol)
+ smdobj.sasa_ng = 590
smdobj.solvent = 'toluene'
e_cds = smdobj.get_cds()
- assert numpy.abs(e_cds - -0.0013476060879874362) < 1e-8
+ assert numpy.abs(e_cds - -0.0013476016530476354) < 1e-8
def test_cds_water(self):
smdobj = smd.SMD(mol)
+ smdobj.sasa_ng = 590
smdobj.solvent = 'water'
e_cds = smdobj.get_cds()
- assert numpy.abs(e_cds - 0.0022847142144050057) < 1e-8
+ assert numpy.abs(e_cds - 0.0022903044356530726) < 1e-8
def test_smd_solvent(self):
mf = scf.RHF(mol)
mf = mf.SMD()
mf.with_solvent.solvent = 'ethanol'
+ mf.with_solvent.sasa_ng = 590
e_tot = mf.kernel()
assert numpy.abs(e_tot - -76.075066568) < 2e-4
@@ -61,9 +64,12 @@ def test_smd_water(self):
mf = scf.RHF(mol)
mf = mf.SMD()
mf.with_solvent.solvent = 'water'
+ mf.with_solvent.sasa_ng = 590
e_tot = mf.kernel()
assert numpy.abs(e_tot - -76.0756052903) < 2e-4
+ # TODO: add more test for other molecules
+
if __name__ == "__main__":
print("Full Tests for SMDs")
unittest.main()
\ No newline at end of file
diff --git a/gpu4pyscf/solvent/tests/test_smd_grad.py b/gpu4pyscf/solvent/tests/test_smd_grad.py
new file mode 100644
index 00000000..ec8173f7
--- /dev/null
+++ b/gpu4pyscf/solvent/tests/test_smd_grad.py
@@ -0,0 +1,87 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import unittest
+import numpy
+from pyscf import gto
+from gpu4pyscf import scf
+from gpu4pyscf.solvent import pcm
+from gpu4pyscf.solvent.grad import pcm as pcm_grad
+from gpu4pyscf.solvent.grad import smd as smd_grad
+from gpu4pyscf.solvent import smd
+
+def setUpModule():
+ global mol
+ mol = gto.Mole()
+ mol.atom = '''P 0.000 0.000 0.000
+O 1.500 0.000 0.000
+O -1.500 0.000 0.000
+O 0.000 1.500 0.000
+O 0.000 -1.500 0.000
+H 1.000 1.000 0.000
+H -1.000 -1.000 0.000
+H 0.000 -2.000 0.000
+'''
+ mol.basis = 'sto3g'
+ mol.output = '/dev/null'
+ mol.build(verbose=0)
+
+def tearDownModule():
+ global mol
+ mol.stdout.close()
+ del mol
+
+def _check_grad(mol, solvent='water'):
+ natm = mol.natm
+ fd_cds = numpy.zeros([natm,3])
+ eps = 1e-4
+ for ia in range(mol.natm):
+ for j in range(3):
+ coords = mol.atom_coords(unit='B')
+ coords[ia,j] += eps
+ mol.set_geom_(coords, unit='B')
+ mol.build()
+
+ smdobj = smd.SMD(mol)
+ smdobj.solvent = solvent
+ e0_cds = smdobj.get_cds()
+
+ coords[ia,j] -= 2.0*eps
+ mol.set_geom_(coords, unit='B')
+ mol.build()
+
+ smdobj = smd.SMD(mol)
+ smdobj.solvent = solvent
+ e1_cds = smdobj.get_cds()
+
+ coords[ia,j] += eps
+ mol.set_geom_(coords, unit='B')
+ fd_cds[ia,j] = (e0_cds - e1_cds) / (2.0 * eps)
+
+ smdobj = smd.SMD(mol)
+ smdobj.solvent = solvent
+ grad_cds = smd_grad.get_cds(smdobj)
+ assert numpy.linalg.norm(fd_cds - grad_cds) < 1e-8
+
+class KnownValues(unittest.TestCase):
+ def test_grad_water(self):
+ _check_grad(mol, solvent='water')
+
+ def test_grad_solvent(self):
+ _check_grad(mol, solvent='ethanol')
+
+if __name__ == "__main__":
+ print("Full Tests for Gradient of SMD")
+ unittest.main()
\ No newline at end of file
diff --git a/gpu4pyscf/solvent/tests/test_smd_hessian.py b/gpu4pyscf/solvent/tests/test_smd_hessian.py
new file mode 100644
index 00000000..24193663
--- /dev/null
+++ b/gpu4pyscf/solvent/tests/test_smd_hessian.py
@@ -0,0 +1,58 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import unittest
+import numpy
+from pyscf import gto
+from gpu4pyscf import scf
+from gpu4pyscf.solvent.hessian import smd as smd_hess
+from gpu4pyscf.solvent import smd
+
+def setUpModule():
+ global mol
+ mol = gto.Mole()
+ mol.atom = '''P 0.000 0.000 0.000
+O 1.500 0.000 0.000
+O -1.500 0.000 0.000
+O 0.000 1.500 0.000
+O 0.000 -1.500 0.000
+H 1.000 1.000 0.000
+H -1.000 -1.000 0.000
+H 0.000 -2.000 0.000
+'''
+ mol.basis = 'sto3g'
+ mol.output = '/dev/null'
+ mol.build(verbose=0)
+
+def tearDownModule():
+ global mol
+ mol.stdout.close()
+ del mol
+
+def _check_hess(mol, solvent='water'):
+ smdobj = smd.SMD(mol)
+ smdobj.solvent = solvent
+ smd_hess.get_cds(smdobj)
+
+class KnownValues(unittest.TestCase):
+ def test_hess_water(self):
+ _check_hess(mol, solvent='water')
+
+ def test_hess_solvent(self):
+ _check_hess(mol, solvent='ethanol')
+
+if __name__ == "__main__":
+ print("Full Tests for Hessian of SMD")
+ unittest.main()
\ No newline at end of file