diff --git a/README.md b/README.md index f396f73e..ec2b380a 100644 --- a/README.md +++ b/README.md @@ -50,8 +50,7 @@ Features - Dispersion corrections via [DFTD3](https://github.com/dftd3/simple-dftd3) and [DFTD4](https://github.com/dftd4/dftd4); - Nonlocal functional correction (vv10) for SCF and gradient; - ECP is supported and calculated on CPU; -- PCM solvent models, analytical gradients, and semi-analytical Hessian matrix; -- SMD solvent models and solvation free energy +- PCM models, SMD model, their analytical gradients, and semi-analytical Hessian matrix; Limitations -------- diff --git a/benchmarks/smd/benchmark_smd.py b/benchmarks/smd/benchmark_smd.py new file mode 100644 index 00000000..2b15baaa --- /dev/null +++ b/benchmarks/smd/benchmark_smd.py @@ -0,0 +1,108 @@ +# Copyright 2024 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import os +import numpy +from pyscf import gto +from gpu4pyscf.solvent import smd +from gpu4pyscf.solvent.grad import smd as smd_grad + +''' +Benchmark CDS, gradient, hessian in the SMD model +''' + +path = '../molecules/organic/' + +# calculated with qchem 6.1, in kcal/mol +e_cds_qchem = {} +e_cds_qchem['water'] = { + '020_Vitamin_C.xyz': 5.0737, + '031_Inosine.xyz': 2.7129, + '033_Bisphenol_A.xyz': 6.2620, + '037_Mg_Porphin.xyz': 6.0393, + '042_Penicillin_V.xyz': 6.4349, + '045_Ochratoxin_A.xyz': 8.8526, + '052_Cetirizine.xyz': 4.6430, + '057_Tamoxifen.xyz': 5.4743, + '066_Raffinose.xyz': 10.2543, + '084_Sphingomyelin.xyz': 15.0308, + '095_Azadirachtin.xyz': 16.9321, + '113_Taxol.xyz': 17.2585, + '168_Valinomycin.xyz': 27.3499, +} + +e_cds_qchem['ethanol'] = { + '020_Vitamin_C.xyz': 4.2119, + '031_Inosine.xyz': 1.0175, + '033_Bisphenol_A.xyz': -0.2454, + '037_Mg_Porphin.xyz': -2.2391, + '042_Penicillin_V.xyz': 1.8338, + '045_Ochratoxin_A.xyz': 1.0592, + '052_Cetirizine.xyz': -2.5099, + '057_Tamoxifen.xyz': -3.9320, + '066_Raffinose.xyz': 3.1120, + '084_Sphingomyelin.xyz': -3.1963, + '095_Azadirachtin.xyz': 6.5286, + '113_Taxol.xyz': 2.7271, + '168_Valinomycin.xyz': 4.0013, +} + +def _check_energy_grad(filename, solvent='water'): + xyz = os.path.join(path, filename) + mol = gto.Mole(atom=xyz) + mol.build() + natm = mol.natm + fd_cds = numpy.zeros([natm,3]) + eps = 1e-4 + for ia in range(mol.natm): + for j in range(3): + coords = mol.atom_coords(unit='B') + coords[ia,j] += eps + mol.set_geom_(coords, unit='B') + mol.build() + + smdobj = smd.SMD(mol) + smdobj.solvent = solvent + e0_cds = smdobj.get_cds() + + coords[ia,j] -= 2.0*eps + mol.set_geom_(coords, unit='B') + mol.build() + + smdobj = smd.SMD(mol) + smdobj.solvent = solvent + e1_cds = smdobj.get_cds() + + coords[ia,j] += eps + mol.set_geom_(coords, unit='B') + fd_cds[ia,j] = (e0_cds - e1_cds) / (2.0 * eps) + + smdobj = smd.SMD(mol) + smdobj.solvent = solvent + e_cds = smd.get_cds(smdobj) * smd.hartree2kcal + grad_cds = smd_grad.get_cds(smdobj) + print(f'e_cds by GPU4PySCF: {e_cds}') + print(f'e_cds by Q-Chem: {e_cds_qchem[solvent][filename]}') + print(f'e_cds(Q-Chem) - e_cds(GPU4PySCF): {e_cds - e_cds_qchem[solvent][filename]}') + print(f'norm (fd gradient - analy gradient: {numpy.linalg.norm(fd_cds - grad_cds.get())}') + assert numpy.linalg.norm(fd_cds - grad_cds.get()) < 1e-8 + +if __name__ == "__main__": + for filename in os.listdir(path): + print(f'---- benchmarking {filename} ----------') + print('in water') + _check_energy_grad(filename, solvent='water') + print('in ethanol') + _check_energy_grad(filename, solvent='ethanol') \ No newline at end of file diff --git a/examples/14-pcm_solvent.py b/examples/14-pcm_solvent.py index 8eaabbf8..7fc040c7 100644 --- a/examples/14-pcm_solvent.py +++ b/examples/14-pcm_solvent.py @@ -33,7 +33,7 @@ mf.grids.atom_grid = (99,590) mf.small_rho_cutoff = 1e-10 mf.with_solvent.lebedev_order = 29 # 302 Lebedev grids -mf.with_solvent.method = 'C-PCM' +mf.with_solvent.method = 'IEF-PCM' mf.with_solvent.eps = 78.3553 mf.kernel() diff --git a/examples/16-smd.py b/examples/16-smd.py index 32c5802c..10e8ef63 100644 --- a/examples/16-smd.py +++ b/examples/16-smd.py @@ -24,11 +24,12 @@ H -0.7570000000 -0.0000000000 -0.4696000000 H 0.7570000000 0.0000000000 -0.4696000000 ''' -mol = pyscf.M(atom=atom, basis='def2-tzvpp', verbose=4) +atom = 'Vitamin_C.xyz' +mol = pyscf.M(atom=atom, basis='def2-tzvpp', verbose=6) -mf = dft.rks.RKS(mol, xc='HYB_GGA_XC_B3LYP')#.density_fit() +mf = dft.rks.RKS(mol, xc='HYB_GGA_XC_B3LYP').density_fit() mf = mf.SMD() -mf.verbose = 4 +mf.verbose = 6 mf.grids.atom_grid = (99,590) mf.small_rho_cutoff = 1e-10 mf.with_solvent.lebedev_order = 29 # 302 Lebedev grids @@ -36,3 +37,9 @@ mf.with_solvent.solvent = 'water' e_tot = mf.kernel() print('total energy with SMD:', e_tot) + +gradobj = mf.nuc_grad_method() +f = gradobj.kernel() + +hessobj = mf.Hessian() +h = hessobj.kernel() \ No newline at end of file diff --git a/gpu4pyscf/__init__.py b/gpu4pyscf/__init__.py index ea5e78a2..98dd2059 100644 --- a/gpu4pyscf/__init__.py +++ b/gpu4pyscf/__init__.py @@ -5,3 +5,8 @@ # monkey patch libxc reference due to a bug in nvcc from pyscf.dft import libxc libxc.__reference__ = 'unable to decode the reference due to https://github.com/NVIDIA/cuda-python/issues/29' + +from gpu4pyscf.lib.utils import patch_cpu_kernel +from gpu4pyscf.lib.cupy_helper import tag_array +from pyscf import lib +lib.tag_array = tag_array diff --git a/gpu4pyscf/lib/cupy_helper.py b/gpu4pyscf/lib/cupy_helper.py index 42eea65c..332486ff 100644 --- a/gpu4pyscf/lib/cupy_helper.py +++ b/gpu4pyscf/lib/cupy_helper.py @@ -17,9 +17,11 @@ import os import sys +import functools import numpy as np import cupy import ctypes +from pyscf import lib from gpu4pyscf.lib import logger from gpu4pyscf.gto import mole from gpu4pyscf.lib.cutensor import contract @@ -93,11 +95,22 @@ def device2host_2d(a_cpu, a_gpu, stream=None): class CPArrayWithTag(cupy.ndarray): pass +@functools.wraps(lib.tag_array) def tag_array(a, **kwargs): - ''' attach attributes to cupy ndarray''' - t = cupy.asarray(a).view(CPArrayWithTag) - if isinstance(a, CPArrayWithTag): - t.__dict__.update(a.__dict__) + ''' + a should be cupy/numpy array or tuple of cupy/numpy array + + attach attributes to cupy ndarray for cupy array + attach attributes to numpy ndarray for numpy array + ''' + if isinstance(a, cupy.ndarray) or isinstance(a[0], cupy.ndarray): + t = cupy.asarray(a).view(CPArrayWithTag) + if isinstance(a, CPArrayWithTag): + t.__dict__.update(a.__dict__) + else: + t = np.asarray(a).view(lib.NPArrayWithTag) + if isinstance(a, lib.NPArrayWithTag): + t.__dict__.update(a.__dict__) t.__dict__.update(kwargs) return t @@ -168,19 +181,23 @@ def add_sparse(a, b, indices): raise RuntimeError('failed in sparse_add2d') return a -def dist_matrix(coords, out=None): - assert coords.flags.c_contiguous - n = coords.shape[0] +def dist_matrix(x, y, out=None): + assert x.flags.c_contiguous + assert y.flags.c_contiguous + + m = x.shape[0] + n = y.shape[0] if out is None: - out = cupy.empty([n,n]) + out = cupy.empty([m,n]) stream = cupy.cuda.get_current_stream() err = libcupy_helper.dist_matrix( ctypes.cast(stream.ptr, ctypes.c_void_p), ctypes.cast(out.data.ptr, ctypes.c_void_p), - ctypes.cast(coords.data.ptr, ctypes.c_void_p), - ctypes.cast(coords.data.ptr, ctypes.c_void_p), - ctypes.c_int(n), + ctypes.cast(x.data.ptr, ctypes.c_void_p), + ctypes.cast(y.data.ptr, ctypes.c_void_p), + ctypes.c_int(m), + ctypes.c_int(n) ) if err != 0: raise RuntimeError('failed in calculating distance matrix') @@ -376,10 +393,8 @@ def cart2sph(t, axis=0, ang=1, out=None): t_cart = t.reshape([i0*nli, li_size[0], i3]) if(out is not None): out = out.reshape([i0*nli, li_size[1], i3]) - out[:] = cupy.einsum('min,ip->mpn', t_cart, c2s) - else: - out = cupy.einsum('min,ip->mpn', t_cart, c2s) - return out.reshape(out_shape) + t_sph = contract('min,ip->mpn', t_cart, c2s, out=out) + return t_sph.reshape(out_shape) # a copy with modification from # https://github.com/pyscf/pyscf/blob/9219058ac0a1bcdd8058166cad0fb9127b82e9bf/pyscf/lib/linalg_helper.py#L1536 diff --git a/gpu4pyscf/lib/cupy_helper/dist_matrix.cu b/gpu4pyscf/lib/cupy_helper/dist_matrix.cu index 77f78de7..2b88db4f 100644 --- a/gpu4pyscf/lib/cupy_helper/dist_matrix.cu +++ b/gpu4pyscf/lib/cupy_helper/dist_matrix.cu @@ -19,11 +19,11 @@ #define THREADS 32 __global__ -static void _calc_distances(double *dist, const double *x, const double *y, int n) +static void _calc_distances(double *dist, const double *x, const double *y, int m, int n) { int i = blockIdx.x * blockDim.x + threadIdx.x; int j = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= n || j >= n){ + if (i >= m || j >= n){ return; } @@ -34,12 +34,13 @@ static void _calc_distances(double *dist, const double *x, const double *y, int } extern "C" { -int dist_matrix(cudaStream_t stream, double *dist, const double *x, const double *y, int n) +int dist_matrix(cudaStream_t stream, double *dist, const double *x, const double *y, int m, int n) { - int ntile = (n + THREADS - 1) / THREADS; + int ntilex = (m + THREADS - 1) / THREADS; + int ntiley = (n + THREADS - 1) / THREADS; dim3 threads(THREADS, THREADS); - dim3 blocks(ntile, ntile); - _calc_distances<<>>(dist, x, y, n); + dim3 blocks(ntilex, ntiley); + _calc_distances<<>>(dist, x, y, m, n); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { return 1; diff --git a/gpu4pyscf/lib/gvhf/g3c2e.cuh b/gpu4pyscf/lib/gvhf/g3c2e.cuh index 9195cddd..4a06f1f7 100644 --- a/gpu4pyscf/lib/gvhf/g3c2e.cuh +++ b/gpu4pyscf/lib/gvhf/g3c2e.cuh @@ -673,331 +673,4 @@ static void write_int3c2e_ip2_jk(JKMatrix jk, double *j3, double* k3, int ksh){ } } -__global__ -static void GINTrun_int3c2e_ip1_jk_kernel1000(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets) -{ - int ntasks_ij = offsets.ntasks_ij; - int ntasks_kl = offsets.ntasks_kl; - int task_ij = blockIdx.x * blockDim.x + threadIdx.x; - int task_kl = blockIdx.y * blockDim.y + threadIdx.y; - bool active = true; - if (task_ij >= ntasks_ij || task_kl >= ntasks_kl) { - active = false; - task_ij = 0; - task_kl = 0; - } - int bas_ij = offsets.bas_ij + task_ij; - int bas_kl = offsets.bas_kl + task_kl; - double norm = envs.fac; - double omega = envs.omega; - int *bas_pair2bra = c_bpcache.bas_pair2bra; - int *bas_pair2ket = c_bpcache.bas_pair2ket; - int ish = bas_pair2bra[bas_ij]; - int jsh = bas_pair2ket[bas_ij]; - int ksh = bas_pair2bra[bas_kl]; - int nprim_ij = envs.nprim_ij; - int nprim_kl = envs.nprim_kl; - int prim_ij = offsets.primitive_ij + task_ij * nprim_ij; - int prim_kl = offsets.primitive_kl + task_kl * nprim_kl; - double* __restrict__ a12 = c_bpcache.a12; - double* __restrict__ e12 = c_bpcache.e12; - double* __restrict__ x12 = c_bpcache.x12; - double* __restrict__ y12 = c_bpcache.y12; - double* __restrict__ z12 = c_bpcache.z12; - double* __restrict__ a1 = c_bpcache.a1; - int ij, kl; - int prim_ij0, prim_ij1, prim_kl0, prim_kl1; - int nbas = c_bpcache.nbas; - double* __restrict__ bas_x = c_bpcache.bas_coords; - double* __restrict__ bas_y = bas_x + nbas; - double* __restrict__ bas_z = bas_y + nbas; - - double gout0 = 0; - double gout1 = 0; - double gout2 = 0; - double xi = bas_x[ish]; - double yi = bas_y[ish]; - double zi = bas_z[ish]; - prim_ij0 = prim_ij; - prim_ij1 = prim_ij + nprim_ij; - prim_kl0 = prim_kl; - prim_kl1 = prim_kl + nprim_kl; - for (ij = prim_ij0; ij < prim_ij1; ++ij) { - for (kl = prim_kl0; kl < prim_kl1; ++kl) { - double ai2 = -2.0*a1[ij]; - double aij = a12[ij]; - double eij = e12[ij]; - double xij = x12[ij]; - double yij = y12[ij]; - double zij = z12[ij]; - double akl = a12[kl]; - double ekl = e12[kl]; - double xkl = x12[kl]; - double ykl = y12[kl]; - double zkl = z12[kl]; - double xijxkl = xij - xkl; - double yijykl = yij - ykl; - double zijzkl = zij - zkl; - double aijkl = aij + akl; - double a1 = aij * akl; - double a0 = a1 / aijkl; - double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; - a0 *= theta; - double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl); - double fac = eij * ekl * sqrt(a0 / (a1 * a1 * a1)); - double root0, weight0; - if (x < 3.e-7) { - root0 = 0.5; - weight0 = 1.; - } else { - double tt = sqrt(x); - double fmt0 = SQRTPIE4 / tt * erf(tt); - weight0 = fmt0; - double e = exp(-x); - double b = .5 / x; - double fmt1 = b * (fmt0 - e); - root0 = fmt1 / (fmt0 - fmt1); - } - root0 /= root0 + 1 - root0 * theta; - double u2 = a0 * root0; - double tmp2 = akl * u2 / (u2 * aijkl + a1);; - double c00x = xij - xi - tmp2 * xijxkl; - double c00y = yij - yi - tmp2 * yijykl; - double c00z = zij - zi - tmp2 * zijzkl; - double g_0 = 1; - double g_1 = c00x; - double g_2 = 1; - double g_3 = c00y; - double g_4 = norm * fac * weight0; - double g_5 = g_4 * c00z; - - double f_1 = ai2 * g_1; - double f_3 = ai2 * g_3; - double f_5 = ai2 * g_5; - - gout0 += f_1 * g_2 * g_4; - gout1 += g_0 * f_3 * g_4; - gout2 += g_0 * g_2 * f_5; - } } - int *ao_loc = c_bpcache.ao_loc; - int i0 = ao_loc[ish] - jk.ao_offsets_i; - int j0 = ao_loc[jsh] - jk.ao_offsets_j; - int k0 = ao_loc[ksh] - jk.ao_offsets_k; - - int nao = jk.nao; - double* __restrict__ dm = jk.dm; - double* __restrict__ rhok = jk.rhok; - double* __restrict__ rhoj = jk.rhoj; - double* __restrict__ vj = jk.vj; - double* __restrict__ vk = jk.vk; - - int tx = threadIdx.x; - int ty = threadIdx.y; - __shared__ double sdata[THREADSX][THREADSY]; - if (!active){ - gout0 = 0.0; gout1 = 0.0; gout2 = 0.0; - } - if (vj != NULL){ - double rhoj_tmp; - int off_dm = i0 + nao*j0; - rhoj_tmp = dm[off_dm] * rhoj[k0]; - double vj_tmp[3]; - vj_tmp[0] = gout0*rhoj_tmp; - vj_tmp[1] = gout1*rhoj_tmp; - vj_tmp[2] = gout2*rhoj_tmp; - for (int j = 0; j < 3; j++){ - sdata[tx][ty] = vj_tmp[j]; __syncthreads(); - if(ty<8) sdata[tx][ty] += sdata[tx][ty+8]; __syncthreads(); - if(ty<4) sdata[tx][ty] += sdata[tx][ty+4]; __syncthreads(); - if(ty<2) sdata[tx][ty] += sdata[tx][ty+2]; __syncthreads(); - if(ty<1) sdata[tx][ty] += sdata[tx][ty+1]; __syncthreads(); - if (ty == 0) atomicAdd(vj+i0+j*nao, sdata[tx][0]); - } - } - if (vk != NULL){ - double rhok_tmp; - int off_rhok = i0 + nao*j0 + k0*nao*nao; - rhok_tmp = rhok[off_rhok]; - double vk_tmp[3]; - vk_tmp[0] = gout0 * rhok_tmp; - vk_tmp[1] = gout1 * rhok_tmp; - vk_tmp[2] = gout2 * rhok_tmp; - for (int j = 0; j < 3; j++){ - sdata[tx][ty] = vk_tmp[j]; __syncthreads(); - if(ty<8) sdata[tx][ty] += sdata[tx][ty+8]; __syncthreads(); - if(ty<4) sdata[tx][ty] += sdata[tx][ty+4]; __syncthreads(); - if(ty<2) sdata[tx][ty] += sdata[tx][ty+2]; __syncthreads(); - if(ty<1) sdata[tx][ty] += sdata[tx][ty+1]; __syncthreads(); - if (ty == 0) atomicAdd(vk+i0+j*nao, sdata[tx][0]); - } - } -} - - -__global__ -static void GINTrun_int3c2e_ip2_jk_kernel0010(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets) -{ - int ntasks_ij = offsets.ntasks_ij; - int ntasks_kl = offsets.ntasks_kl; - int task_ij = blockIdx.x * blockDim.x + threadIdx.x; - int task_kl = blockIdx.y * blockDim.y + threadIdx.y; - bool active = true; - if (task_ij >= ntasks_ij || task_kl >= ntasks_kl) { - active = false; - task_ij = 0; - task_kl = 0; - } - int bas_ij = offsets.bas_ij + task_ij; - int bas_kl = offsets.bas_kl + task_kl; - double norm = envs.fac; - double omega = envs.omega; - int *bas_pair2bra = c_bpcache.bas_pair2bra; - int *bas_pair2ket = c_bpcache.bas_pair2ket; - int ish = bas_pair2bra[bas_ij]; - int jsh = bas_pair2ket[bas_ij]; - int ksh = bas_pair2bra[bas_kl]; - int nprim_ij = envs.nprim_ij; - int nprim_kl = envs.nprim_kl; - int prim_ij = offsets.primitive_ij + task_ij * nprim_ij; - int prim_kl = offsets.primitive_kl + task_kl * nprim_kl; - double* __restrict__ a12 = c_bpcache.a12; - double* __restrict__ e12 = c_bpcache.e12; - double* __restrict__ x12 = c_bpcache.x12; - double* __restrict__ y12 = c_bpcache.y12; - double* __restrict__ z12 = c_bpcache.z12; - double* __restrict__ a1 = c_bpcache.a1; - int ij, kl; - int prim_ij0, prim_ij1, prim_kl0, prim_kl1; - int nbas = c_bpcache.nbas; - double* __restrict__ bas_x = c_bpcache.bas_coords; - double* __restrict__ bas_y = bas_x + nbas; - double* __restrict__ bas_z = bas_y + nbas; - - double gout0 = 0; - double gout1 = 0; - double gout2 = 0; - double xk = bas_x[ksh]; - double yk = bas_y[ksh]; - double zk = bas_z[ksh]; - prim_ij0 = prim_ij; - prim_ij1 = prim_ij + nprim_ij; - prim_kl0 = prim_kl; - prim_kl1 = prim_kl + nprim_kl; - for (ij = prim_ij0; ij < prim_ij1; ++ij) { - for (kl = prim_kl0; kl < prim_kl1; ++kl) { - double ak2 = -2.0*a1[kl]; - double aij = a12[ij]; - double eij = e12[ij]; - double xij = x12[ij]; - double yij = y12[ij]; - double zij = z12[ij]; - double akl = a12[kl]; - double ekl = e12[kl]; - double xkl = x12[kl]; - double ykl = y12[kl]; - double zkl = z12[kl]; - double xijxkl = xij - xkl; - double yijykl = yij - ykl; - double zijzkl = zij - zkl; - double aijkl = aij + akl; - double a1 = aij * akl; - double a0 = a1 / aijkl; - double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; - a0 *= theta; - double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl); - double fac = norm * eij * ekl * sqrt(a0 / (a1 * a1 * a1)); - double root0, weight0; - if (x < 3.e-7) { - root0 = 0.5; - weight0 = 1.; - } else { - double tt = sqrt(x); - double fmt0 = SQRTPIE4 / tt * erf(tt); - weight0 = fmt0; - double e = exp(-x); - double b = .5 / x; - double fmt1 = b * (fmt0 - e); - root0 = fmt1 / (fmt0 - fmt1); - } - root0 /= root0 + 1 - root0 * theta; - double u2 = a0 * root0; - double tmp4 = .5 / (u2 * aijkl + a1); - double b00 = u2 * tmp4; - double tmp1 = 2 * b00; - double tmp3 = tmp1 * aij; - double c0px = xkl - xk + tmp3 * xijxkl; - double c0py = ykl - yk + tmp3 * yijykl; - double c0pz = zkl - zk + tmp3 * zijzkl; - double g_0 = 1; - double g_1 = c0px; - double g_2 = 1; - double g_3 = c0py; - double g_4 = weight0 * fac; - double g_5 = c0pz * g_4; - - double f_1 = ak2 * g_1; - double f_3 = ak2 * g_3; - double f_5 = ak2 * g_5; - - gout0 += f_1 * g_2 * g_4; - gout1 += g_0 * f_3 * g_4; - gout2 += g_0 * g_2 * f_5; - - } } - - int *ao_loc = c_bpcache.ao_loc; - int i0 = ao_loc[ish] - jk.ao_offsets_i; - int j0 = ao_loc[jsh] - jk.ao_offsets_j; - int k0 = ao_loc[ksh] - jk.ao_offsets_k; - - int nao = jk.nao; - int naux = jk.naux; - double* __restrict__ dm = jk.dm; - double* __restrict__ rhok = jk.rhok; - double* __restrict__ rhoj = jk.rhoj; - double* __restrict__ vj = jk.vj; - double* __restrict__ vk = jk.vk; - - int tx = threadIdx.x; - int ty = threadIdx.y; - __shared__ double sdata[THREADSX][THREADSY]; - if (!active){ - gout0 = 0.0; gout1 = 0.0; gout2 = 0.0; - } - if (vj != NULL){ - double rhoj_tmp; - int off_dm = i0 + nao*j0; - rhoj_tmp = dm[off_dm] * rhoj[k0]; - double vj_tmp[3]; - vj_tmp[0] = gout0 * rhoj_tmp; - vj_tmp[1] = gout1 * rhoj_tmp; - vj_tmp[2] = gout2 * rhoj_tmp; - for (int j = 0; j < 3; j++){ - sdata[tx][ty] = vj_tmp[j]; __syncthreads(); - if(tx<8) sdata[tx][ty] += sdata[tx+8][ty]; __syncthreads(); - if(tx<4) sdata[tx][ty] += sdata[tx+4][ty]; __syncthreads(); - if(tx<2) sdata[tx][ty] += sdata[tx+2][ty]; __syncthreads(); - if(tx<1) sdata[tx][ty] += sdata[tx+1][ty]; __syncthreads(); - if (tx == 0) atomicAdd(vj+k0+j*naux, sdata[0][ty]); - } - } - - if (vk != NULL){ - double rhok_tmp; - int off_rhok = i0 + nao*j0 + k0*nao*nao; - rhok_tmp = rhok[off_rhok]; - double vk_tmp[3]; - vk_tmp[0] = gout0 * rhok_tmp; - vk_tmp[1] = gout1 * rhok_tmp; - vk_tmp[2] = gout2 * rhok_tmp; - for (int j = 0; j < 3; j++){ - sdata[tx][ty] = vk_tmp[j]; __syncthreads(); - if(tx<8) sdata[tx][ty] += sdata[tx+8][ty]; __syncthreads(); - if(tx<4) sdata[tx][ty] += sdata[tx+4][ty]; __syncthreads(); - if(tx<2) sdata[tx][ty] += sdata[tx+2][ty]; __syncthreads(); - if(tx<1) sdata[tx][ty] += sdata[tx+1][ty]; __syncthreads(); - if (tx == 0) atomicAdd(vk+k0+j*naux, sdata[0][ty]); - } - } -} diff --git a/gpu4pyscf/lib/gvhf/g3c2e_ip1.cu b/gpu4pyscf/lib/gvhf/g3c2e_ip1.cu index cd20ac66..3c0c88fc 100644 --- a/gpu4pyscf/lib/gvhf/g3c2e_ip1.cu +++ b/gpu4pyscf/lib/gvhf/g3c2e_ip1.cu @@ -13,7 +13,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ - + template __global__ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets) @@ -47,7 +47,7 @@ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o double uw[NROOTS*2]; double g[2*GSIZE]; double *f = g + GSIZE; - + double* __restrict__ a12 = c_bpcache.a12; double* __restrict__ x12 = c_bpcache.x12; double* __restrict__ y12 = c_bpcache.y12; @@ -69,7 +69,7 @@ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o as_ksh = lsh; as_lsh = ksh; } - + double j3[GPU_CART_MAX * 3]; double k3[GPU_CART_MAX * 3]; for (int k = 0; k < GPU_CART_MAX * 3; k++){ @@ -79,7 +79,7 @@ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o if (active) { for (ij = prim_ij; ij < prim_ij+nprim_ij; ++ij) { for (kl = prim_kl; kl < prim_kl+nprim_kl; ++kl) { - + double aij = a12[ij]; double xij = x12[ij]; double yij = y12[ij]; @@ -94,19 +94,180 @@ void GINTint3c2e_ip1_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o double aijkl = aij + akl; double a1 = aij * akl; double a0 = a1 / aijkl; - double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; + double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; a0 *= theta; double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl); GINTrys_root(x, uw); GINTscale_u(uw, theta); GINTg0_2e_2d4d(envs, g, uw, norm, as_ish, as_jsh, as_ksh, as_lsh, ij, kl); - + double ai2 = -2.0*exp[ij]; GINTnabla1i_2e(envs, f, g, ai2, envs.i_l, envs.j_l, envs.k_l); GINTkernel_int3c2e_ip1_getjk_direct(envs, jk, j3, k3, f, g, ish, jsh, ksh); - } + } } } - + write_int3c2e_ip1_jk(jk, j3, k3, ish); } + +__global__ +static void GINTrun_int3c2e_ip1_jk_kernel1000(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets) +{ + int ntasks_ij = offsets.ntasks_ij; + int ntasks_kl = offsets.ntasks_kl; + int task_ij = blockIdx.x * blockDim.x + threadIdx.x; + int task_kl = blockIdx.y * blockDim.y + threadIdx.y; + bool active = true; + if (task_ij >= ntasks_ij || task_kl >= ntasks_kl) { + active = false; + task_ij = 0; + task_kl = 0; + } + int bas_ij = offsets.bas_ij + task_ij; + int bas_kl = offsets.bas_kl + task_kl; + double norm = envs.fac; + double omega = envs.omega; + int *bas_pair2bra = c_bpcache.bas_pair2bra; + int *bas_pair2ket = c_bpcache.bas_pair2ket; + int ish = bas_pair2bra[bas_ij]; + int jsh = bas_pair2ket[bas_ij]; + int ksh = bas_pair2bra[bas_kl]; + int nprim_ij = envs.nprim_ij; + int nprim_kl = envs.nprim_kl; + int prim_ij = offsets.primitive_ij + task_ij * nprim_ij; + int prim_kl = offsets.primitive_kl + task_kl * nprim_kl; + double* __restrict__ a12 = c_bpcache.a12; + double* __restrict__ e12 = c_bpcache.e12; + double* __restrict__ x12 = c_bpcache.x12; + double* __restrict__ y12 = c_bpcache.y12; + double* __restrict__ z12 = c_bpcache.z12; + double* __restrict__ a1 = c_bpcache.a1; + int ij, kl; + int prim_ij0, prim_ij1, prim_kl0, prim_kl1; + int nbas = c_bpcache.nbas; + double* __restrict__ bas_x = c_bpcache.bas_coords; + double* __restrict__ bas_y = bas_x + nbas; + double* __restrict__ bas_z = bas_y + nbas; + + double gout0 = 0; + double gout1 = 0; + double gout2 = 0; + double xi = bas_x[ish]; + double yi = bas_y[ish]; + double zi = bas_z[ish]; + prim_ij0 = prim_ij; + prim_ij1 = prim_ij + nprim_ij; + prim_kl0 = prim_kl; + prim_kl1 = prim_kl + nprim_kl; + for (ij = prim_ij0; ij < prim_ij1; ++ij) { + for (kl = prim_kl0; kl < prim_kl1; ++kl) { + double ai2 = -2.0*a1[ij]; + double aij = a12[ij]; + double eij = e12[ij]; + double xij = x12[ij]; + double yij = y12[ij]; + double zij = z12[ij]; + double akl = a12[kl]; + double ekl = e12[kl]; + double xkl = x12[kl]; + double ykl = y12[kl]; + double zkl = z12[kl]; + double xijxkl = xij - xkl; + double yijykl = yij - ykl; + double zijzkl = zij - zkl; + double aijkl = aij + akl; + double a1 = aij * akl; + double a0 = a1 / aijkl; + double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; + a0 *= theta; + double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl); + double fac = eij * ekl * sqrt(a0 / (a1 * a1 * a1)); + double root0, weight0; + if (x < 3.e-7) { + root0 = 0.5; + weight0 = 1.; + } else { + double tt = sqrt(x); + double fmt0 = SQRTPIE4 / tt * erf(tt); + weight0 = fmt0; + double e = exp(-x); + double b = .5 / x; + double fmt1 = b * (fmt0 - e); + root0 = fmt1 / (fmt0 - fmt1); + } + root0 /= root0 + 1 - root0 * theta; + double u2 = a0 * root0; + double tmp2 = akl * u2 / (u2 * aijkl + a1);; + double c00x = xij - xi - tmp2 * xijxkl; + double c00y = yij - yi - tmp2 * yijykl; + double c00z = zij - zi - tmp2 * zijzkl; + double g_0 = 1; + double g_1 = c00x; + double g_2 = 1; + double g_3 = c00y; + double g_4 = norm * fac * weight0; + double g_5 = g_4 * c00z; + + double f_1 = ai2 * g_1; + double f_3 = ai2 * g_3; + double f_5 = ai2 * g_5; + + gout0 += f_1 * g_2 * g_4; + gout1 += g_0 * f_3 * g_4; + gout2 += g_0 * g_2 * f_5; + } } + + int *ao_loc = c_bpcache.ao_loc; + int i0 = ao_loc[ish] - jk.ao_offsets_i; + int j0 = ao_loc[jsh] - jk.ao_offsets_j; + int k0 = ao_loc[ksh] - jk.ao_offsets_k; + + int nao = jk.nao; + double* __restrict__ dm = jk.dm; + double* __restrict__ rhok = jk.rhok; + double* __restrict__ rhoj = jk.rhoj; + double* __restrict__ vj = jk.vj; + double* __restrict__ vk = jk.vk; + + int tx = threadIdx.x; + int ty = threadIdx.y; + __shared__ double sdata[THREADSX][THREADSY]; + if (!active){ + gout0 = 0.0; gout1 = 0.0; gout2 = 0.0; + } + if (vj != NULL){ + double rhoj_tmp; + int off_dm = i0 + nao*j0; + rhoj_tmp = dm[off_dm] * rhoj[k0]; + double vj_tmp[3]; + vj_tmp[0] = gout0*rhoj_tmp; + vj_tmp[1] = gout1*rhoj_tmp; + vj_tmp[2] = gout2*rhoj_tmp; + for (int j = 0; j < 3; j++){ + sdata[tx][ty] = vj_tmp[j]; __syncthreads(); + if(ty<8) sdata[tx][ty] += sdata[tx][ty+8]; __syncthreads(); + if(ty<4) sdata[tx][ty] += sdata[tx][ty+4]; __syncthreads(); + if(ty<2) sdata[tx][ty] += sdata[tx][ty+2]; __syncthreads(); + if(ty<1) sdata[tx][ty] += sdata[tx][ty+1]; __syncthreads(); + if (ty == 0) atomicAdd(vj+i0+j*nao, sdata[tx][0]); + } + } + if (vk != NULL){ + double rhok_tmp; + int off_rhok = i0 + nao*j0 + k0*nao*nao; + rhok_tmp = rhok[off_rhok]; + double vk_tmp[3]; + vk_tmp[0] = gout0 * rhok_tmp; + vk_tmp[1] = gout1 * rhok_tmp; + vk_tmp[2] = gout2 * rhok_tmp; + for (int j = 0; j < 3; j++){ + sdata[tx][ty] = vk_tmp[j]; __syncthreads(); + if(ty<8) sdata[tx][ty] += sdata[tx][ty+8]; __syncthreads(); + if(ty<4) sdata[tx][ty] += sdata[tx][ty+4]; __syncthreads(); + if(ty<2) sdata[tx][ty] += sdata[tx][ty+2]; __syncthreads(); + if(ty<1) sdata[tx][ty] += sdata[tx][ty+1]; __syncthreads(); + if (ty == 0) atomicAdd(vk+i0+j*nao, sdata[tx][0]); + } + } +} diff --git a/gpu4pyscf/lib/gvhf/g3c2e_ip2.cu b/gpu4pyscf/lib/gvhf/g3c2e_ip2.cu index e2a4cdbc..d83f3608 100644 --- a/gpu4pyscf/lib/gvhf/g3c2e_ip2.cu +++ b/gpu4pyscf/lib/gvhf/g3c2e_ip2.cu @@ -13,7 +13,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ - + template __global__ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets) @@ -47,7 +47,7 @@ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o double uw[NROOTS*2]; double g[2*GSIZE]; double *f = g + GSIZE; - + double* __restrict__ a12 = c_bpcache.a12; double* __restrict__ x12 = c_bpcache.x12; double* __restrict__ y12 = c_bpcache.y12; @@ -69,7 +69,7 @@ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o as_ksh = lsh; as_lsh = ksh; } - + double j3[GPU_CART_MAX * 3]; double k3[GPU_CART_MAX * 3]; for (int k = 0; k < GPU_CART_MAX * 3; k++){ @@ -79,7 +79,7 @@ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o if (active) { for (ij = prim_ij; ij < prim_ij+nprim_ij; ++ij) { for (kl = prim_kl; kl < prim_kl+nprim_kl; ++kl) { - + double aij = a12[ij]; double xij = x12[ij]; double yij = y12[ij]; @@ -94,19 +94,187 @@ void GINTint3c2e_ip2_jk_kernel(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets o double aijkl = aij + akl; double a1 = aij * akl; double a0 = a1 / aijkl; - double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; + double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; a0 *= theta; double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl); GINTrys_root(x, uw); GINTscale_u(uw, theta); GINTg0_2e_2d4d(envs, g, uw, norm, as_ish, as_jsh, as_ksh, as_lsh, ij, kl); - + double ak2 = -2.0*exp[kl]; GINTnabla1k_2e(envs, f, g, ak2, envs.i_l, envs.j_l, envs.k_l); GINTkernel_int3c2e_ip2_getjk_direct(envs, jk, j3, k3, f, g, ish, jsh, ksh); - } + } } } - + write_int3c2e_ip2_jk(jk, j3, k3, ksh); } + + +__global__ +static void GINTrun_int3c2e_ip2_jk_kernel0010(GINTEnvVars envs, JKMatrix jk, BasisProdOffsets offsets) +{ + int ntasks_ij = offsets.ntasks_ij; + int ntasks_kl = offsets.ntasks_kl; + int task_ij = blockIdx.x * blockDim.x + threadIdx.x; + int task_kl = blockIdx.y * blockDim.y + threadIdx.y; + bool active = true; + if (task_ij >= ntasks_ij || task_kl >= ntasks_kl) { + active = false; + task_ij = 0; + task_kl = 0; + } + int bas_ij = offsets.bas_ij + task_ij; + int bas_kl = offsets.bas_kl + task_kl; + double norm = envs.fac; + double omega = envs.omega; + int *bas_pair2bra = c_bpcache.bas_pair2bra; + int *bas_pair2ket = c_bpcache.bas_pair2ket; + int ish = bas_pair2bra[bas_ij]; + int jsh = bas_pair2ket[bas_ij]; + int ksh = bas_pair2bra[bas_kl]; + int nprim_ij = envs.nprim_ij; + int nprim_kl = envs.nprim_kl; + int prim_ij = offsets.primitive_ij + task_ij * nprim_ij; + int prim_kl = offsets.primitive_kl + task_kl * nprim_kl; + double* __restrict__ a12 = c_bpcache.a12; + double* __restrict__ e12 = c_bpcache.e12; + double* __restrict__ x12 = c_bpcache.x12; + double* __restrict__ y12 = c_bpcache.y12; + double* __restrict__ z12 = c_bpcache.z12; + double* __restrict__ a1 = c_bpcache.a1; + int ij, kl; + int prim_ij0, prim_ij1, prim_kl0, prim_kl1; + int nbas = c_bpcache.nbas; + double* __restrict__ bas_x = c_bpcache.bas_coords; + double* __restrict__ bas_y = bas_x + nbas; + double* __restrict__ bas_z = bas_y + nbas; + + double gout0 = 0; + double gout1 = 0; + double gout2 = 0; + double xk = bas_x[ksh]; + double yk = bas_y[ksh]; + double zk = bas_z[ksh]; + prim_ij0 = prim_ij; + prim_ij1 = prim_ij + nprim_ij; + prim_kl0 = prim_kl; + prim_kl1 = prim_kl + nprim_kl; + for (ij = prim_ij0; ij < prim_ij1; ++ij) { + for (kl = prim_kl0; kl < prim_kl1; ++kl) { + double ak2 = -2.0*a1[kl]; + double aij = a12[ij]; + double eij = e12[ij]; + double xij = x12[ij]; + double yij = y12[ij]; + double zij = z12[ij]; + double akl = a12[kl]; + double ekl = e12[kl]; + double xkl = x12[kl]; + double ykl = y12[kl]; + double zkl = z12[kl]; + double xijxkl = xij - xkl; + double yijykl = yij - ykl; + double zijzkl = zij - zkl; + double aijkl = aij + akl; + double a1 = aij * akl; + double a0 = a1 / aijkl; + double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; + a0 *= theta; + double x = a0 * (xijxkl * xijxkl + yijykl * yijykl + zijzkl * zijzkl); + double fac = norm * eij * ekl * sqrt(a0 / (a1 * a1 * a1)); + double root0, weight0; + if (x < 3.e-7) { + root0 = 0.5; + weight0 = 1.; + } else { + double tt = sqrt(x); + double fmt0 = SQRTPIE4 / tt * erf(tt); + weight0 = fmt0; + double e = exp(-x); + double b = .5 / x; + double fmt1 = b * (fmt0 - e); + root0 = fmt1 / (fmt0 - fmt1); + } + root0 /= root0 + 1 - root0 * theta; + double u2 = a0 * root0; + double tmp4 = .5 / (u2 * aijkl + a1); + double b00 = u2 * tmp4; + double tmp1 = 2 * b00; + double tmp3 = tmp1 * aij; + double c0px = xkl - xk + tmp3 * xijxkl; + double c0py = ykl - yk + tmp3 * yijykl; + double c0pz = zkl - zk + tmp3 * zijzkl; + double g_0 = 1; + double g_1 = c0px; + double g_2 = 1; + double g_3 = c0py; + double g_4 = weight0 * fac; + double g_5 = c0pz * g_4; + + double f_1 = ak2 * g_1; + double f_3 = ak2 * g_3; + double f_5 = ak2 * g_5; + + gout0 += f_1 * g_2 * g_4; + gout1 += g_0 * f_3 * g_4; + gout2 += g_0 * g_2 * f_5; + + } } + + int *ao_loc = c_bpcache.ao_loc; + int i0 = ao_loc[ish] - jk.ao_offsets_i; + int j0 = ao_loc[jsh] - jk.ao_offsets_j; + int k0 = ao_loc[ksh] - jk.ao_offsets_k; + + int nao = jk.nao; + int naux = jk.naux; + double* __restrict__ dm = jk.dm; + double* __restrict__ rhok = jk.rhok; + double* __restrict__ rhoj = jk.rhoj; + double* __restrict__ vj = jk.vj; + double* __restrict__ vk = jk.vk; + + int tx = threadIdx.x; + int ty = threadIdx.y; + __shared__ double sdata[THREADSX][THREADSY]; + if (!active){ + gout0 = 0.0; gout1 = 0.0; gout2 = 0.0; + } + if (vj != NULL){ + double rhoj_tmp; + int off_dm = i0 + nao*j0; + rhoj_tmp = dm[off_dm] * rhoj[k0]; + double vj_tmp[3]; + vj_tmp[0] = gout0 * rhoj_tmp; + vj_tmp[1] = gout1 * rhoj_tmp; + vj_tmp[2] = gout2 * rhoj_tmp; + for (int j = 0; j < 3; j++){ + sdata[tx][ty] = vj_tmp[j]; __syncthreads(); + if(tx<8) sdata[tx][ty] += sdata[tx+8][ty]; __syncthreads(); + if(tx<4) sdata[tx][ty] += sdata[tx+4][ty]; __syncthreads(); + if(tx<2) sdata[tx][ty] += sdata[tx+2][ty]; __syncthreads(); + if(tx<1) sdata[tx][ty] += sdata[tx+1][ty]; __syncthreads(); + if (tx == 0) atomicAdd(vj+k0+j*naux, sdata[0][ty]); + } + } + + if (vk != NULL){ + double rhok_tmp; + int off_rhok = i0 + nao*j0 + k0*nao*nao; + rhok_tmp = rhok[off_rhok]; + double vk_tmp[3]; + vk_tmp[0] = gout0 * rhok_tmp; + vk_tmp[1] = gout1 * rhok_tmp; + vk_tmp[2] = gout2 * rhok_tmp; + for (int j = 0; j < 3; j++){ + sdata[tx][ty] = vk_tmp[j]; __syncthreads(); + if(tx<8) sdata[tx][ty] += sdata[tx+8][ty]; __syncthreads(); + if(tx<4) sdata[tx][ty] += sdata[tx+4][ty]; __syncthreads(); + if(tx<2) sdata[tx][ty] += sdata[tx+2][ty]; __syncthreads(); + if(tx<1) sdata[tx][ty] += sdata[tx+1][ty]; __syncthreads(); + if (tx == 0) atomicAdd(vk+k0+j*naux, sdata[0][ty]); + } + } +} \ No newline at end of file diff --git a/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip1.cu b/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip1.cu index 9174520d..5b7faf14 100644 --- a/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip1.cu +++ b/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip1.cu @@ -68,17 +68,17 @@ static int GINTrun_tasks_int3c2e_ip1_jk(JKMatrix *jk, BasisProdOffsets *offsets, extern "C" { __host__ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache, - double *vj, double *vk, double *dm, double *rhoj, double *rhok, + double *vj, double *vk, double *dm, double *rhoj, double *rhok, int *ao_offsets, int nao, int naux, int n_dm, int *bins_locs_ij, int ntasks_kl, int ncp_ij, int cp_kl_id, double omega) { ContractionProdType *cp_kl = bpcache->cptype + cp_kl_id; - + int ng[4] = {1,0,0,0}; - + // move bpcache to constant memory checkCudaErrors(cudaMemcpyToSymbol(c_bpcache, bpcache, sizeof(BasisProdCache))); - + JKMatrix jk; jk.n_dm = n_dm; jk.nao = nao; @@ -92,8 +92,7 @@ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache, jk.ao_offsets_j = ao_offsets[1]; jk.ao_offsets_k = ao_offsets[2]; jk.ao_offsets_l = ao_offsets[3]; - BasisProdOffsets offsets; - + int *bas_pairs_locs = bpcache->bas_pairs_locs; int *primitive_pairs_locs = bpcache->primitive_pairs_locs; @@ -101,9 +100,9 @@ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache, for (int n = 0; n < MAX_STREAMS; n++){ checkCudaErrors(cudaStreamCreate(&streams[n])); } - + int *idx = (int *)malloc(sizeof(int) * TOT_NF * 3); - int *l_locs = (int *)malloc(sizeof(int) * (GPU_LMAX + 2)); + int *l_locs = (int *)malloc(sizeof(int) * (GPU_LMAX + 2)); GINTinit_index1d_xyz(idx, l_locs); checkCudaErrors(cudaMemcpyToSymbol(c_idx, idx, sizeof(int) * TOT_NF*3)); checkCudaErrors(cudaMemcpyToSymbol(c_l_locs, l_locs, sizeof(int) * (GPU_LMAX + 2))); @@ -122,7 +121,8 @@ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache, int ntasks_ij = bins_locs_ij[cp_ij_id+1] - bins_locs_ij[cp_ij_id]; if (ntasks_ij <= 0) continue; - + + BasisProdOffsets offsets; offsets.ntasks_ij = ntasks_ij; offsets.ntasks_kl = ntasks_kl; offsets.bas_ij = bas_pairs_locs[cp_ij_id]; @@ -140,7 +140,7 @@ int GINTbuild_int3c2e_ip1_jk(BasisProdCache *bpcache, checkCudaErrors(cudaStreamSynchronize(streams[n])); checkCudaErrors(cudaStreamDestroy(streams[n])); } - + return 0; } diff --git a/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip2.cu b/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip2.cu index 7adefe75..cfc45bfa 100644 --- a/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip2.cu +++ b/gpu4pyscf/lib/gvhf/nr_jk_driver_int3c2e_ip2.cu @@ -67,17 +67,17 @@ static int GINTrun_tasks_int3c2e_ip2_jk(JKMatrix *jk, BasisProdOffsets *offsets, extern "C" { __host__ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache, - double *vj, double *vk, double *dm, double *rhoj, double *rhok, + double *vj, double *vk, double *dm, double *rhoj, double *rhok, int *ao_offsets, int nao, int naux, int n_dm, int *bins_locs_ij, int ntasks_kl, int ncp_ij, int cp_kl_id, double omega) { ContractionProdType *cp_kl = bpcache->cptype + cp_kl_id; - + int ng[4] = {0,0,1,0}; - + // move bpcache to constant memory checkCudaErrors(cudaMemcpyToSymbol(c_bpcache, bpcache, sizeof(BasisProdCache))); - + JKMatrix jk; jk.n_dm = n_dm; jk.nao = nao; @@ -91,18 +91,17 @@ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache, jk.ao_offsets_j = ao_offsets[1]; jk.ao_offsets_k = ao_offsets[2]; jk.ao_offsets_l = ao_offsets[3]; - BasisProdOffsets offsets; - + int *bas_pairs_locs = bpcache->bas_pairs_locs; int *primitive_pairs_locs = bpcache->primitive_pairs_locs; - + cudaStream_t streams[MAX_STREAMS]; for (int n = 0; n < MAX_STREAMS; n++){ checkCudaErrors(cudaStreamCreate(&streams[n])); } - + int *idx = (int *)malloc(sizeof(int) * TOT_NF * 3); - int *l_locs = (int *)malloc(sizeof(int) * (GPU_LMAX + 2)); + int *l_locs = (int *)malloc(sizeof(int) * (GPU_LMAX + 2)); GINTinit_index1d_xyz(idx, l_locs); checkCudaErrors(cudaMemcpyToSymbol(c_idx, idx, sizeof(int) * TOT_NF*3)); checkCudaErrors(cudaMemcpyToSymbol(c_l_locs, l_locs, sizeof(int) * (GPU_LMAX + 2))); @@ -111,7 +110,7 @@ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache, for (int cp_ij_id = 0; cp_ij_id < ncp_ij; cp_ij_id++){ int n_stream = cp_ij_id % MAX_STREAMS; - + GINTEnvVars envs; ContractionProdType *cp_ij = bpcache->cptype + cp_ij_id; GINTinit_EnvVars(&envs, cp_ij, cp_kl, ng); @@ -123,6 +122,7 @@ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache, int ntasks_ij = bins_locs_ij[cp_ij_id+1] - bins_locs_ij[cp_ij_id]; if (ntasks_ij <= 0) continue; + BasisProdOffsets offsets; offsets.ntasks_ij = ntasks_ij; offsets.ntasks_kl = ntasks_kl; offsets.bas_ij = bas_pairs_locs[cp_ij_id]; @@ -131,7 +131,7 @@ int GINTbuild_int3c2e_ip2_jk(BasisProdCache *bpcache, offsets.primitive_kl = primitive_pairs_locs[cp_kl_id]; int err = GINTrun_tasks_int3c2e_ip2_jk(&jk, &offsets, &envs, streams[n_stream]); - + if (err != 0) { return err; } diff --git a/gpu4pyscf/lib/tests/test_cupy_helper.py b/gpu4pyscf/lib/tests/test_cupy_helper.py index 2b085674..987c7165 100644 --- a/gpu4pyscf/lib/tests/test_cupy_helper.py +++ b/gpu4pyscf/lib/tests/test_cupy_helper.py @@ -72,8 +72,7 @@ def test_sparse(self): def test_dist_matrix(self): a = cupy.random.rand(4, 3) rij = cupy.sum((a[:,None,:] - a[None,:,:])**2, axis=2)**0.5 - - rij0 = dist_matrix(a) + rij0 = dist_matrix(a, a) assert cupy.linalg.norm(rij - rij0) < 1e-10 def test_takebak(self): diff --git a/gpu4pyscf/lib/tests/test_cutensor.py b/gpu4pyscf/lib/tests/test_cutensor.py index 7cbd08d5..ca338331 100644 --- a/gpu4pyscf/lib/tests/test_cutensor.py +++ b/gpu4pyscf/lib/tests/test_cutensor.py @@ -16,7 +16,7 @@ import unittest import numpy import cupy -from gpu4pyscf.lib.cupy_helper import * +from gpu4pyscf.lib.cupy_helper import contract class KnownValues(unittest.TestCase): def test_contract(self): @@ -49,7 +49,7 @@ def test_cache(self): c = contract('ijkl,jl->ik', a, b) c_einsum = cupy.einsum('ijkl,jl->ik', a, b) assert cupy.linalg.norm(c - c_einsum) < 1e-10 - + if __name__ == "__main__": print("Full tests for cutensor module") unittest.main() \ No newline at end of file diff --git a/gpu4pyscf/solvent/grad/pcm.py b/gpu4pyscf/solvent/grad/pcm.py index cf6cda8f..a2149c4d 100644 --- a/gpu4pyscf/solvent/grad/pcm.py +++ b/gpu4pyscf/solvent/grad/pcm.py @@ -24,9 +24,10 @@ from pyscf import lib from pyscf import gto, df from pyscf.grad import rhf as rhf_grad +from pyscf.solvent import ddcosmo_grad + from gpu4pyscf.solvent.pcm import PI, switch_h from gpu4pyscf.df import int3c2e - from gpu4pyscf.lib.cupy_helper import contract from gpu4pyscf.lib import logger @@ -193,7 +194,6 @@ def grad_qv(pcmobj, dm): intopt.build(1e-14, diag_block_with_triu=True, aosym=False) coeff = intopt.coeff dm_cart = coeff @ dm @ coeff.T - #dm_cart = cupy.einsum('pi,ij,qj->pq', coeff, dm, coeff) dvj, _ = int3c2e.get_int3c2e_ip_jk(intopt, 0, 'ip1', q_sym, None, dm_cart) dq, _ = int3c2e.get_int3c2e_ip_jk(intopt, 0, 'ip2', q_sym, None, dm_cart) @@ -232,10 +232,10 @@ def grad_solver(pcmobj, dm): dF, dA = get_dF_dA(pcmobj.surface) - with_D = pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE'] + with_D = pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE', 'SMD'] dD, dS, dSii = get_dD_dS(pcmobj.surface, dF, with_D=with_D, with_S=True) - if pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']: + if pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE', 'SMD']: DA = D*A epsilon = pcmobj.eps @@ -253,7 +253,7 @@ def grad_solver(pcmobj, dm): de -= cupy.asarray([cupy.sum(de_dS[p0:p1], axis=0) for p0,p1, in gridslice]) de -= 0.5*contract('i,xij->jx', vK_1*q, dSii) # 0.5*cupy.einsum('i,xij,i->jx', vK_1, dSii, q) - elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']: + elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE', 'SMD']: dD = dD.transpose([2,0,1]) dS = dS.transpose([2,0,1]) dSii = dSii.transpose([2,0,1]) @@ -315,41 +315,51 @@ def contract_ket(a, B, c): return de.get() def make_grad_object(grad_method): - ''' - return solvent gradient object - ''' - grad_method_class = grad_method.__class__ - class WithSolventGrad(grad_method_class): - def __init__(self, grad_method): - self.__dict__.update(grad_method.__dict__) - self.de_solvent = None - self.de_solute = None - self._keys = self._keys.union(['de_solvent', 'de_solute']) - - def kernel(self, *args, dm=None, atmlst=None, **kwargs): - dm = kwargs.pop('dm', None) - if dm is None: - dm = self.base.make_rdm1(ao_repr=True) - - self.de_solvent = grad_qv(self.base.with_solvent, dm) - self.de_solvent+= grad_solver(self.base.with_solvent, dm) - self.de_solvent+= grad_nuc(self.base.with_solvent, dm) - - self.de_solute = grad_method_class.kernel(self, *args, **kwargs) - self.de = self.de_solute + self.de_solvent - - if self.verbose >= logger.NOTE: - logger.note(self, '--------------- %s (+%s) gradients ---------------', - self.base.__class__.__name__, - self.base.with_solvent.__class__.__name__) - rhf_grad._write(self, self.mol, self.de, self.atmlst) - logger.note(self, '----------------------------------------------') - return self.de - - def _finalize(self): - # disable _finalize. It is called in grad_method.kernel method - # where self.de was not yet initialized. - pass - - return WithSolventGrad(grad_method) - + '''For grad_method in vacuum, add nuclear gradients of solvent pcmobj''' + if grad_method.base.with_solvent.frozen: + raise RuntimeError('Frozen solvent model is not avialbe for energy gradients') + + name = (grad_method.base.with_solvent.__class__.__name__ + + grad_method.__class__.__name__) + return lib.set_class(WithSolventGrad(grad_method), + (WithSolventGrad, grad_method.__class__), name) + +class WithSolventGrad: + _keys = {'de_solvent', 'de_solute'} + + def __init__(self, grad_method): + self.__dict__.update(grad_method.__dict__) + self.de_solvent = None + self.de_solute = None + + def undo_solvent(self): + cls = self.__class__ + name_mixin = self.base.with_solvent.__class__.__name__ + obj = lib.view(self, lib.drop_class(cls, WithSolventGrad, name_mixin)) + del obj.de_solvent + del obj.de_solute + return obj + + def kernel(self, *args, dm=None, atmlst=None, **kwargs): + dm = kwargs.pop('dm', None) + if dm is None: + dm = self.base.make_rdm1(ao_repr=True) + + self.de_solute = super().kernel(*args, **kwargs) + self.de_solvent = grad_qv(self.base.with_solvent, dm) + self.de_solvent+= grad_solver(self.base.with_solvent, dm) + self.de_solvent+= grad_nuc(self.base.with_solvent, dm) + self.de = self.de_solute + self.de_solvent + + if self.verbose >= logger.NOTE: + logger.note(self, '--------------- %s (+%s) gradients ---------------', + self.base.__class__.__name__, + self.base.with_solvent.__class__.__name__) + rhf_grad._write(self, self.mol, self.de, self.atmlst) + logger.note(self, '----------------------------------------------') + return self.de + + def _finalize(self): + # disable _finalize. It is called in grad_method.kernel method + # where self.de was not yet initialized. + pass \ No newline at end of file diff --git a/gpu4pyscf/solvent/grad/smd.py b/gpu4pyscf/solvent/grad/smd.py new file mode 100644 index 00000000..60e25a22 --- /dev/null +++ b/gpu4pyscf/solvent/grad/smd.py @@ -0,0 +1,279 @@ +# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +''' +Gradient of PCM family solvent model +''' +# pylint: disable=C0103 + +import numpy as np +import cupy +#from cupyx import scipy, jit +from pyscf import lib +from pyscf import gto, df +from pyscf.grad import rhf as rhf_grad +from pyscf.data import radii +from pyscf.solvent import ddcosmo_grad + +from gpu4pyscf.solvent import pcm, smd +from gpu4pyscf.solvent.grad import pcm as pcm_grad +from gpu4pyscf.solvent.smd import ( + sigma_water, sigma_n, sigma_alpha, sigma_beta, r_zz, swtich_function, + hartree2kcal) +from gpu4pyscf.df import int3c2e +from gpu4pyscf.lib.cupy_helper import contract +from gpu4pyscf.lib import logger + +def grad_swtich_function(R, r, dr): + if R < r + dr: + return -np.exp(dr/(R-dr-r)) * dr / (R-dr-r)**2 + else: + return 0.0 + +def atomic_surface_tension(symbols, coords, n, alpha, beta, water=True): + ''' + - list of atomic symbols + - atomic coordinates in Anstrong + - solvent descriptors: n, alpha, beta + ''' + + def get_bond_tension(bond): + if water: + return sigma_water.get(bond, 0.0) + t = 0.0 + t += sigma_n.get(bond, 0.0) * n + t += sigma_alpha.get(bond, 0.0) * alpha + t += sigma_beta.get(bond, 0.0) * beta + return t + + def get_atom_tension(sym_i): + if water: + return sigma_water.get(sym_i, 0.0) + t = 0.0 + t += sigma_n.get(sym_i, 0.0) * n + t += sigma_alpha.get(sym_i, 0.0) * alpha + t += sigma_beta.get(sym_i, 0.0) * beta + return t + natm = coords.shape[0] + ri_rj = coords[:,None,:] - coords[None,:,:] + rij = np.sum(ri_rj**2, axis=2)**0.5 + np.fill_diagonal(rij, 1) + drij = ri_rj / np.expand_dims(rij, axis=-1) + tensions = [] + for i, sym_i in enumerate(symbols): + if sym_i not in ['H', 'C', 'N', 'O', 'F', 'Si', 'S', 'Cl', 'Br']: + tensions.append(np.zeros([natm,3])) + continue + + tension = np.zeros([natm,3]) + if sym_i in ['F', 'Si', 'S', 'Cl', 'Br']: + tensions.append(tension) + continue + + if sym_i == 'H': + dt_HC = np.zeros([natm,3]) + dt_HO = np.zeros([natm,3]) + for j, sym_j in enumerate(symbols): + if sym_j == 'C': + r, dr = r_zz.get(('H','C'), (0.0, 0.0)) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_HC[i] += dt_drij + dt_HC[j] -= dt_drij + if sym_j == 'O': + r, dr = r_zz.get(('H','O'), (0.0, 0.0)) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_HO[i] += dt_drij + dt_HO[j] -= dt_drij + sig_HC = get_bond_tension(('H','C')) + sig_HO = get_bond_tension(('H','O')) + tension += sig_HC * dt_HC + sig_HO * dt_HO + tensions.append(tension) + continue + + if sym_i == 'C': + dt_CC = np.zeros([natm,3]) + dt_CN = np.zeros([natm,3]) + t_CN = 0.0 + for j, sym_j in enumerate(symbols): + if sym_j == 'C' and i != j: + r, dr = r_zz.get(('C', 'C'), (0.0, 0.0)) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_CC[i] += dt_drij + dt_CC[j] -= dt_drij + if sym_j == 'N': + r, dr = r_zz.get(('C', 'N'), (0.0, 0.0)) + t_CN += swtich_function(rij[i,j], r, dr) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_CN[i] += dt_drij + dt_CN[j] -= dt_drij + sig_CC = get_bond_tension(('C','C')) + sig_CN = get_bond_tension(('C','N')) + tension += sig_CC * dt_CC + sig_CN * (2 * t_CN * dt_CN) + tensions.append(tension) + continue + + if sym_i == 'N': + t_NC = 0.0 + dt_NC = np.zeros([natm,3]) + dt_NC3 = np.zeros([natm,3]) + for j, sym_j in enumerate(symbols): + if sym_j == 'C': + r, dr = r_zz.get(('N','C'), (0.0, 0.0)) + tk = 0.0 + dtk = np.zeros([natm,3]) + for k, sym_k in enumerate(symbols): + if k != i and k != j: + rjk, drjk = r_zz.get(('C', sym_k), (0.0, 0.0)) + tk += swtich_function(rij[j,k], rjk, drjk) + dtk_rjk = grad_swtich_function(rij[j,k], rjk, drjk) * drij[j,k] + dtk[j] += dtk_rjk + dtk[k] -= dtk_rjk + + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] * tk**2 + dt_NC[i] += dt_drij + dt_NC[j] -= dt_drij + + t = swtich_function(rij[i,j], r, dr) + dt_NC += t * (2 * tk * dtk) + t_NC += t * tk**2 + + r, dr = r_zz.get(('N','C3'), (0.0, 0.0)) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_NC3[i] += dt_drij + dt_NC3[j] -= dt_drij + sig_NC = get_bond_tension(('N','C')) + sig_NC3= get_bond_tension(('N','C3')) + tension += sig_NC * (1.3 * t_NC**0.3 * dt_NC) + sig_NC3 * dt_NC3 + tensions.append(tension) + continue + + if sym_i == 'O': + dt_OC = np.zeros([natm,3]) + dt_ON = np.zeros([natm,3]) + dt_OO = np.zeros([natm,3]) + dt_OP = np.zeros([natm,3]) + for j, sym_j in enumerate(symbols): + if sym_j == 'C': + r, dr = r_zz.get(('O','C'), (0.0, 0.0)) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_OC[i] += dt_drij + dt_OC[j] -= dt_drij + if sym_j == 'N': + r, dr = r_zz.get(('O','N'), (0.0, 0.0)) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_ON[i] += dt_drij + dt_ON[j] -= dt_drij + if sym_j == 'O' and j != i: + r, dr = r_zz.get(('O','O'), (0.0, 0.0)) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_OO[i] += dt_drij + dt_OO[j] -= dt_drij + if sym_j == 'P': + r, dr = r_zz.get(('O','P'), (0.0, 0.0)) + dt_drij = grad_swtich_function(rij[i,j], r, dr) * drij[i,j] + dt_OP[i] += dt_drij + dt_OP[j] -= dt_drij + sig_OC = get_bond_tension(('O','C')) + sig_ON = get_bond_tension(('O','N')) + sig_OO = get_bond_tension(('O','O')) + sig_OP = get_bond_tension(('O','P')) + tension += sig_OC * dt_OC + sig_ON * dt_ON + sig_OO * dt_OO + sig_OP * dt_OP + tensions.append(tension) + continue + + return cupy.asarray(tensions) + +def get_cds(smdobj): + mol = smdobj.mol + n, _, alpha, beta, gamma, _, phi, psi = smdobj.solvent_descriptors + symbols = [mol.atom_symbol(ia) for ia in range(mol.natm)] + coords = mol.atom_coords(unit='A') + if smdobj._solvent.lower() != 'water': + grad_tension = atomic_surface_tension(symbols, coords, n, alpha, beta, water=False) + atm_tension = smd.atomic_surface_tension(symbols, coords, n, alpha, beta, water=False) + mol_tension = smd.molecular_surface_tension(beta, gamma, phi, psi) + else: + grad_tension = atomic_surface_tension(symbols, coords, n, alpha, beta, water=True) + atm_tension = smd.atomic_surface_tension(symbols, coords, n, alpha, beta, water=True) + mol_tension = 0.0 + + # generate surface for calculating SASA + rad = radii.VDW + 0.4/radii.BOHR + surface = pcm.gen_surface(mol, ng=smdobj.sasa_ng, rad=rad) + _, grad_area = pcm_grad.get_dF_dA(surface) + area = surface['area'] + gridslice = surface['gslice_by_atom'] + SASA = cupy.asarray([cupy.sum(area[p0:p1], axis=0) for p0,p1, in gridslice]).get() + grad_SASA = cupy.asarray([cupy.sum(grad_area[p0:p1], axis=0) for p0,p1, in gridslice]).get() + SASA *= radii.BOHR**2 + grad_SASA *= radii.BOHR**2 + mol_cds = mol_tension * np.sum(grad_SASA, axis=0) / 1000 + grad_tension *= radii.BOHR + atm_cds = np.einsum('i,ijx->jx', SASA, grad_tension.get()) / 1000 + atm_cds+= np.einsum('i,ijx->jx', atm_tension.get(), grad_SASA) / 1000 + return (mol_cds + atm_cds)/hartree2kcal # hartree + +def make_grad_object(grad_method): + '''For grad_method in vacuum, add nuclear gradients of solvent pcmobj''' + if grad_method.base.with_solvent.frozen: + raise RuntimeError('Frozen solvent model is not avialbe for energy gradients') + + name = (grad_method.base.with_solvent.__class__.__name__ + + grad_method.__class__.__name__) + return lib.set_class(WithSolventGrad(grad_method), + (WithSolventGrad, grad_method.__class__), name) + +class WithSolventGrad: + _keys = {'de_solvent', 'de_solute'} + + def __init__(self, grad_method): + self.__dict__.update(grad_method.__dict__) + self.de_solvent = None + self.de_solute = None + + def undo_solvent(self): + cls = self.__class__ + name_mixin = self.base.with_solvent.__class__.__name__ + obj = lib.view(self, lib.drop_class(cls, WithSolventGrad, name_mixin)) + del obj.de_solvent + del obj.de_solute + return obj + + def kernel(self, *args, dm=None, atmlst=None, **kwargs): + dm = kwargs.pop('dm', None) + if dm is None: + dm = self.base.make_rdm1(ao_repr=True) + + self.de_solute = super().kernel(*args, **kwargs) + self.de_solvent = pcm_grad.grad_qv(self.base.with_solvent, dm) + self.de_solvent+= pcm_grad.grad_solver(self.base.with_solvent, dm) + self.de_solvent+= pcm_grad.grad_nuc(self.base.with_solvent, dm) + + self.de = self.de_solute + self.de_solvent + self.de += get_cds(self.base.with_solvent) + if self.verbose >= logger.NOTE: + logger.note(self, '--------------- %s (+%s) gradients ---------------', + self.base.__class__.__name__, + self.base.with_solvent.__class__.__name__) + rhf_grad._write(self, self.mol, self.de, self.atmlst) + logger.note(self, '----------------------------------------------') + return self.de + + def _finalize(self): + # disable _finalize. It is called in grad_method.kernel method + # where self.de was not yet initialized. + pass + + diff --git a/gpu4pyscf/solvent/hessian/pcm.py b/gpu4pyscf/solvent/hessian/pcm.py index ae28942c..4c5c4ba7 100644 --- a/gpu4pyscf/solvent/hessian/pcm.py +++ b/gpu4pyscf/solvent/hessian/pcm.py @@ -131,7 +131,11 @@ def pcm_grad_scanner(mol): pcmobj.reset(mol) e, v = pcmobj._get_vind(dm) #return grad_elec(pcmobj, dm) - return grad_nuc(pcmobj, dm) + grad_solver(pcmobj, dm) + grad_qv(pcmobj, dm) + pcm_grad = grad_nuc(pcmobj, dm) + pcm_grad+= grad_solver(pcmobj, dm) + pcm_grad+= grad_qv(pcmobj, dm) + return pcm_grad + mol.verbose = 0 de = numpy.zeros([mol.natm, mol.natm, 3, 3]) eps = 1e-3 @@ -224,44 +228,49 @@ def analytic_grad_vmat(pcmobj, mo_coeff, mo_occ, atmlst=None, verbose=None): pcmobj.reset(pmol) return vmat """ + def make_hess_object(hess_method): - ''' - return solvent hessian object - ''' - hess_method_class = hess_method.__class__ - class WithSolventHess(hess_method_class): - def __init__(self, hess_method): - self.__dict__.update(hess_method.__dict__) - self.de_solvent = None - self.de_solute = None - self._keys = self._keys.union(['de_solvent', 'de_solute']) - - def kernel(self, *args, dm=None, atmlst=None, **kwargs): - dm = kwargs.pop('dm', None) - if dm is None: - dm = self.base.make_rdm1(ao_repr=True) - is_equilibrium = self.base.with_solvent.equilibrium_solvation - self.base.with_solvent.equilibrium_solvation = True - self.de_solvent = hess_elec(self.base.with_solvent, dm, verbose=self.verbose) - #self.de_solvent+= hess_nuc(self.base.with_solvent) - self.de_solute = hess_method_class.kernel(self, *args, **kwargs) - self.de = self.de_solute + self.de_solvent - self.base.with_solvent.equilibrium_solvation = is_equilibrium - return self.de - - def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): - if atmlst is None: - atmlst = range(self.mol.natm) - h1ao = hess_method_class.make_h1(self, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) - dv = fd_grad_vmat(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) - for i0, ia in enumerate(atmlst): - h1ao[i0] += dv[i0] - return h1ao - - def _finalize(self): - # disable _finalize. It is called in grad_method.kernel method - # where self.de was not yet initialized. - pass - - return WithSolventHess(hess_method) + if hess_method.base.with_solvent.frozen: + raise RuntimeError('Frozen solvent model is not avialbe for energy hessian') + + name = (hess_method.base.with_solvent.__class__.__name__ + + hess_method.__class__.__name__) + return lib.set_class(WithSolventHess(hess_method), + (WithSolventHess, hess_method.__class__), name) + +class WithSolventHess: + _keys = {'de_solvent', 'de_solute'} + + def __init__(self, hess_method): + self.__dict__.update(hess_method.__dict__) + self.de_solvent = None + self.de_solute = None + + def kernel(self, *args, dm=None, atmlst=None, **kwargs): + dm = kwargs.pop('dm', None) + if dm is None: + dm = self.base.make_rdm1(ao_repr=True) + is_equilibrium = self.base.with_solvent.equilibrium_solvation + self.base.with_solvent.equilibrium_solvation = True + self.de_solvent = hess_elec(self.base.with_solvent, dm, verbose=self.verbose) + #self.de_solvent+= hess_nuc(self.base.with_solvent) + self.de_solute = super().kernel(*args, **kwargs) + self.de = self.de_solute + self.de_solvent + self.base.with_solvent.equilibrium_solvation = is_equilibrium + return self.de + + def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): + if atmlst is None: + atmlst = range(self.mol.natm) + h1ao = super().make_h1(mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) + dv = fd_grad_vmat(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) + for i0, ia in enumerate(atmlst): + h1ao[i0] += dv[i0] + return h1ao + + def _finalize(self): + # disable _finalize. It is called in grad_method.kernel method + # where self.de was not yet initialized. + pass + diff --git a/gpu4pyscf/solvent/hessian/smd.py b/gpu4pyscf/solvent/hessian/smd.py new file mode 100644 index 00000000..af41595b --- /dev/null +++ b/gpu4pyscf/solvent/hessian/smd.py @@ -0,0 +1,303 @@ +# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +''' +Gradient of PCM family solvent model +''' +# pylint: disable=C0103 + +import numpy as np +import cupy +#from cupyx import scipy, jit +from pyscf import lib +from pyscf import gto, df +from pyscf.grad import rhf as rhf_grad +from pyscf.data import radii +from pyscf.solvent import ddcosmo_grad + +from gpu4pyscf.solvent import pcm, smd +from gpu4pyscf.solvent.grad import pcm as pcm_grad +from gpu4pyscf.solvent.grad import smd as smd_grad +from gpu4pyscf.solvent.hessian import pcm as pcm_hess + +from gpu4pyscf.solvent.smd import ( + sigma_water, sigma_n, sigma_alpha, sigma_beta, r_zz, swtich_function, + hartree2kcal) +from gpu4pyscf.df import int3c2e +from gpu4pyscf.lib.cupy_helper import contract +from gpu4pyscf.lib import logger + +def hess_swtich_function(R, r, dr): + if R < r + dr: + dist = (R[0]**2 + R[1]**2 + R[2]**2)**.5 + hess = [ + [R[1]**2+R[2]**2, -R[0]*R[1], -R[0]*R[2]], + [-R[0]*R[1], R[0]**2+R[2]**2, -R[1]*R[2]], + [-R[0]*R[2], -R[1]*R[2], R[0]**2+R[1]**2]] + return np.asarray(hess)/dist + else: + return np.zeros([3,3]) + +def atomic_surface_tension(symbols, coords, n, alpha, beta, water=True): + ''' + TODO: debug later + - list of atomic symbols + - atomic coordinates in Anstrong + - solvent descriptors: n, alpha, beta + ''' + + def get_bond_tension(bond): + if water: + return sigma_water.get(bond, 0.0) + t = 0.0 + t += sigma_n.get(bond, 0.0) * n + t += sigma_alpha.get(bond, 0.0) * alpha + t += sigma_beta.get(bond, 0.0) * beta + return t + + def get_atom_tension(sym_i): + if water: + return sigma_water.get(sym_i, 0.0) + t = 0.0 + t += sigma_n.get(sym_i, 0.0) * n + t += sigma_alpha.get(sym_i, 0.0) * alpha + t += sigma_beta.get(sym_i, 0.0) * beta + return t + natm = coords.shape[0] + ri_rj = coords[:,None,:] - coords[None,:,:] + rij = np.sum(ri_rj**2, axis=2)**0.5 + np.fill_diagonal(rij, 1) + #drij = ri_rj / np.expand_dims(rij, axis=-1) + tensions = [] + for i, sym_i in enumerate(symbols): + if sym_i not in ['H', 'C', 'N', 'O', 'F', 'Si', 'S', 'Cl', 'Br']: + tensions.append(np.zeros([natm,3])) + continue + + tension = np.zeros([natm,3]) + if sym_i in ['F', 'Si', 'S', 'Cl', 'Br']: + tensions.append(tension) + continue + + if sym_i == 'H': + dt_HC = np.zeros([natm,natm,3,3]) + dt_HO = np.zeros([natm,natm,3,3]) + for j, sym_j in enumerate(symbols): + if sym_j == 'C': + r, dr = r_zz.get(('H','C'), (0.0, 0.0)) + dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + dt_HC[i,i] += dt_drij + dt_HC[i,j] -= dt_drij + dt_HC[j,i] -= dt_drij + dt_HC[j,j] += dt_drij + if sym_j == 'O': + r, dr = r_zz.get(('H','O'), (0.0, 0.0)) + dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + dt_HO[i,i] += dt_drij + dt_HO[i,j] -= dt_drij + dt_HO[j,i] -= dt_drij + dt_HO[j,j] += dt_drij + sig_HC = get_bond_tension(('H','C')) + sig_HO = get_bond_tension(('H','O')) + tension += sig_HC * dt_HC + sig_HO * dt_HO + tensions.append(tension) + + if sym_i == 'C': + dt_CN = np.zeros([natm,3]) + d2t_CC = np.zeros([natm,natm,3,3]) + d2t_CN = np.zeros([natm,natm,3,3]) + t_CN = 0.0 + for j, sym_j in enumerate(symbols): + if sym_j == 'C' and i != j: + r, dr = r_zz.get(('C', 'C'), (0.0, 0.0)) + d2t_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + d2t_CC[i,i] += d2t_drij + d2t_CC[i,j] -= d2t_drij + d2t_CC[j,i] -= d2t_drij + d2t_CC[j,j] += d2t_drij + if sym_j == 'N': + r, dr = r_zz.get(('C', 'N'), (0.0, 0.0)) + t_CN += swtich_function(rij[i,j], r, dr) + dt_drij = smd_grad.grad_switch_function(coords[i]-coords[j], r, dr) + dt_CN[i] += dt_drij + dt_CN[j] -= dt_drij + d2t_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + d2t_CN[i,i] += d2t_drij + d2t_CN[i,j] -= d2t_drij + d2t_CN[j,i] -= d2t_drij + d2t_CN[j,j] += d2t_drij + sig_CC = get_bond_tension(('C','C')) + sig_CN = get_bond_tension(('C','N')) + tension += sig_CC * d2t_CC + sig_CN * (2 * t_CN * d2t_CN + 2 * np.einsum('i,j->ij', dt_drij[i], dt_drij[j])) + tensions.append(tension) + + if sym_i == 'N': + t_NC = 0.0 + dt_NC = np.zeros([natm,natm,3,3]) + dt_NC3 = np.zeros([natm,natm,3,3]) + for j, sym_j in enumerate(symbols): + if sym_j == 'C': + r, dr = r_zz.get(('N','C'), (0.0, 0.0)) + tk = 0.0 + dtk = np.zeros([natm,natm,3,3]) + for k, sym_k in enumerate(symbols): + if k != i and k != j: + rjk, drjk = r_zz.get(('C', sym_k), (0.0, 0.0)) + tk += swtich_function(rij[j,k], rjk, drjk) + dtk_rjk = hess_swtich_function(coords[j]-coords[k], rjk, drjk) + dtk[j,j] += dtk_rjk + dtk[j,k] -= dtk_rjk + dtk[k,j] -= dtk_rjk + dtk[k,k] += dtk_rjk + dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) * tk**2 + dt_NC[i,i] += dt_drij + dt_NC[i,j] -= dt_drij + dt_NC[j,i] -= dt_drij + dt_NC[j,j] += dt_drij + t = swtich_function(coords[i]-coords[j], r, dr) + dt_NC += t * (2 * tk * dtk) + t_NC += t * tk**2 + + r, dr = r_zz.get(('N','C3'), (0.0, 0.0)) + dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + dt_NC3[i,i] += dt_drij + dt_NC3[i,j] -= dt_drij + dt_NC3[j,i] -= dt_drij + dt_NC3[j,j] += dt_drij + sig_NC = get_bond_tension(('N','C')) + sig_NC3= get_bond_tension(('N','C3')) + tension += sig_NC * (1.3 * t_NC**0.3 * dt_NC) + sig_NC3 * dt_NC3 + tensions.append(tension) + + if sym_i == 'O': + dt_OC = np.zeros([natm,natm,3,3]) + dt_ON = np.zeros([natm,natm,3,3]) + dt_OO = np.zeros([natm,natm,3,3]) + dt_OP = np.zeros([natm,natm,3,3]) + for j, sym_j in enumerate(symbols): + if sym_j == 'C': + r, dr = r_zz.get(('O','C'), (0.0, 0.0)) + dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + dt_OC[i,i] += dt_drij + dt_OC[i,j] -= dt_drij + dt_OC[j,i] -= dt_drij + dt_OC[j,j] += dt_drij + if sym_j == 'N': + r, dr = r_zz.get(('O','N'), (0.0, 0.0)) + dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + dt_ON[i,i] += dt_drij + dt_ON[i,j] -= dt_drij + dt_ON[j,i] -= dt_drij + dt_ON[j,j] += dt_drij + if sym_j == 'O' and j != i: + r, dr = r_zz.get(('O','O'), (0.0, 0.0)) + dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + dt_OO[i,i] += dt_drij + dt_OO[i,j] -= dt_drij + dt_OO[j,i] -= dt_drij + dt_OO[j,j] += dt_drij + if sym_j == 'P': + r, dr = r_zz.get(('O','P'), (0.0, 0.0)) + dt_drij = hess_swtich_function(coords[i]-coords[j], r, dr) + dt_OP[i,i] += dt_drij + dt_OP[i,j] -= dt_drij + dt_OP[j,i] -= dt_drij + dt_OP[j,j] += dt_drij + sig_OC = get_bond_tension(('O','C')) + sig_ON = get_bond_tension(('O','N')) + sig_OO = get_bond_tension(('O','O')) + sig_OP = get_bond_tension(('O','P')) + tension += sig_OC * dt_OC + sig_ON * dt_ON + sig_OO * dt_OO + sig_OP * dt_OP + tensions.append(tension) + return cupy.asarray(tensions) + +def get_cds(smdobj): + mol = smdobj.mol + solvent = smdobj.solvent + def smd_grad_scanner(mol): + smdobj_tmp = smd.SMD(mol) + smdobj_tmp.solvent = solvent + return smd_grad.get_cds(smdobj_tmp) + + log = logger.new_logger(mol, mol.verbose) + t1 = log.init_timer() + + eps = 1e-4 + natm = mol.natm + hess_cds = np.zeros([natm,natm,3,3]) + for ia in range(mol.natm): + for j in range(3): + coords = mol.atom_coords(unit='B') + coords[ia,j] += eps + mol.set_geom_(coords, unit='B') + mol.build() + grad0_cds = smd_grad_scanner(mol) + + coords[ia,j] -= 2.0*eps + mol.set_geom_(coords, unit='B') + mol.build() + grad1_cds = smd_grad_scanner(mol) + + coords[ia,j] += eps + mol.set_geom_(coords, unit='B') + hess_cds[ia,:,j] = (grad0_cds - grad1_cds) / (2.0 * eps) + t1 = log.timer_debug1('solvent energy', *t1) + return hess_cds # hartree + +def make_hess_object(hess_method): + '''For hess_method in vacuum, add nuclear Hessian of solvent smdobj''' + if hess_method.base.with_solvent.frozen: + raise RuntimeError('Frozen solvent model is not avialbe for energy hessian') + + name = (hess_method.base.with_solvent.__class__.__name__ + + hess_method.__class__.__name__) + return lib.set_class(WithSolventHess(hess_method), + (WithSolventHess, hess_method.__class__), name) + +class WithSolventHess: + _keys = {'de_solvent', 'de_solute'} + + def __init__(self, hess_method): + self.__dict__.update(hess_method.__dict__) + self.de_solvent = None + self.de_solute = None + + def kernel(self, *args, dm=None, atmlst=None, **kwargs): + dm = kwargs.pop('dm', None) + if dm is None: + dm = self.base.make_rdm1(ao_repr=True) + is_equilibrium = self.base.with_solvent.equilibrium_solvation + self.base.with_solvent.equilibrium_solvation = True + self.de_solvent = pcm_hess.hess_elec(self.base.with_solvent, dm, verbose=self.verbose) + #self.de_solvent+= hess_nuc(self.base.with_solvent) + self.de_solute = super().kernel(*args, **kwargs) + self.de = self.de_solute + self.de_solvent + self.de += get_cds(self.base.with_solvent) + self.base.with_solvent.equilibrium_solvation = is_equilibrium + return self.de + + def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): + if atmlst is None: + atmlst = range(self.mol.natm) + h1ao = super().make_h1(mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) + dv = pcm_hess.fd_grad_vmat(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) + for i0, ia in enumerate(atmlst): + h1ao[i0] += dv[i0] + return h1ao + + def _finalize(self): + # disable _finalize. It is called in grad_method.kernel method + # where self.de was not yet initialized. + pass diff --git a/gpu4pyscf/solvent/pcm.py b/gpu4pyscf/solvent/pcm.py index 335de1f7..50e3957f 100644 --- a/gpu4pyscf/solvent/pcm.py +++ b/gpu4pyscf/solvent/pcm.py @@ -25,7 +25,7 @@ from pyscf import gto, df from pyscf.dft import gen_grid from pyscf.data import radii -from pyscf.solvent import ddcosmo +from pyscf.solvent import ddcosmo#, _attach_solvent from gpu4pyscf.solvent import _attach_solvent from gpu4pyscf.df import int3c2e from gpu4pyscf.lib import logger @@ -122,7 +122,8 @@ def gen_surface(mol, ng=302, rad=modified_Bondi, vdw_scale=1.2, r_probe=0.0): atom_grid = r_vdw * unit_sphere[:,:3] + atom_coords[ia,:] #riJ = scipy.spatial.distance.cdist(atom_grid[:,:3], atom_coords) - riJ = cupy.sum((atom_grid[:,None,:] - atom_coords[None,:,:])**2, axis=2)**0.5 + #riJ = cupy.sum((atom_grid[:,None,:] - atom_coords[None,:,:])**2, axis=2)**0.5 + riJ = dist_matrix(atom_grid, atom_coords) diJ = (riJ - R_in_J) / R_sw_J diJ[:,ia] = 1.0 diJ[diJ<1e-8] = 0.0 @@ -193,7 +194,7 @@ def get_D_S(surface, with_S=True, with_D=False): xi_ij = xi_i * xi_j / (xi_i**2 + xi_j**2)**0.5 #rij = scipy.spatial.distance.cdist(grid_coords, grid_coords) #rij = cupy.sum((grid_coords[:,None,:] - grid_coords[None,:,:])**2, axis=2)**0.5 - rij = dist_matrix(grid_coords) + rij = dist_matrix(grid_coords, grid_coords) xi_r_ij = xi_ij * rij cupy.fill_diagonal(rij, 1) S = scipy.special.erf(xi_r_ij) / rij @@ -201,8 +202,13 @@ def get_D_S(surface, with_S=True, with_D=False): D = None if with_D: - drij = cupy.expand_dims(grid_coords, axis=1) - grid_coords - nrij = cupy.sum(drij * norm_vec, axis=-1) + #drij = cupy.expand_dims(grid_coords, axis=1) - grid_coords + #nri = cupy.sum(grid_coords * norm_vec, axis=-1) + #nrij = cupy.expand_dims(nri, axis=1) - nri + #cupy.expand_dims(grid_coords, axis=1) * norm_vec + nrij = grid_coords.dot(norm_vec.T) - cupy.sum(grid_coords * norm_vec, axis=-1) + + #nrij = cupy.sum(drij * norm_vec, axis=-1) D = S*nrij/rij**2 -2.0*xi_r_ij/PI**0.5*cupy.exp(-xi_r_ij**2)*nrij/rij**3 cupy.fill_diagonal(D, -charge_exp * (2.0 / PI)**0.5 / (2.0 * R_vdw)) diff --git a/gpu4pyscf/solvent/smd.py b/gpu4pyscf/solvent/smd.py index 5fb5a8e0..1cc79f98 100644 --- a/gpu4pyscf/solvent/smd.py +++ b/gpu4pyscf/solvent/smd.py @@ -18,12 +18,14 @@ ''' import numpy as np +import scipy import cupy from pyscf import lib from pyscf.data import radii from pyscf.dft import gen_grid from gpu4pyscf.solvent import pcm, _attach_solvent from gpu4pyscf.lib import logger +from gpu4pyscf.lib.cupy_helper import dist_matrix @lib.with_doc(_attach_solvent._for_scf.__doc__) def smd_for_scf(mf, solvent_obj=None, dm=None): @@ -353,17 +355,13 @@ def get_atom_tension(sym_i): t += sigma_beta.get(sym_i, 0.0) * beta return t - rij = cupy.sum((coords[:,None,:] - coords[None,:,:])**2, axis=2)**0.5 + rij = scipy.spatial.distance.cdist(coords, coords) tensions = [] for i, sym_i in enumerate(symbols): if sym_i not in ['H', 'C', 'N', 'O', 'F', 'Si', 'S', 'Cl', 'Br']: tensions.append(0) continue - #sig_n = sigma_n.get(sym_i, 0.0) - #sig_a = sigma_alpha.get(sym_i, 0.0) - #sig_b = sigma_beta.get(sym_i, 0.0) - #tension = sig_n * n + sig_a * alpha + sig_b * beta tension = get_atom_tension(sym_i) if sym_i in ['F', 'Si', 'S', 'Cl', 'Br']: tensions.append(tension) @@ -374,50 +372,53 @@ def get_atom_tension(sym_i): t_HO = 0.0 for j, sym_j in enumerate(symbols): if sym_j == 'C': - r, dr = r_zz['H','C'] + r, dr = r_zz.get(('H','C'), (0.0, 0.0)) t_HC += swtich_function(rij[i,j], r, dr) if sym_j == 'O': - r, dr = r_zz['H','O'] + r, dr = r_zz.get(('H','O'), (0.0, 0.0)) t_HO += swtich_function(rij[i,j], r, dr) sig_HC = get_bond_tension(('H','C')) sig_HO = get_bond_tension(('H','O')) tension += sig_HC * t_HC + sig_HO * t_HO tensions.append(tension) + continue if sym_i == 'C': t_CC = 0.0 t_CN = 0.0 for j, sym_j in enumerate(symbols): if sym_j == 'C' and i != j: - r, dr = r_zz['C', 'C'] + r, dr = r_zz.get(('C', 'C'), (0.0, 0.0)) t_CC += swtich_function(rij[i,j], r, dr) if sym_j == 'N': - r, dr = r_zz['C', 'N'] + r, dr = r_zz.get(('C', 'N'), (0.0, 0.0)) t_CN += swtich_function(rij[i,j], r, dr) sig_CC = get_bond_tension(('C','C')) sig_CN = get_bond_tension(('C','N')) tension += sig_CC * t_CC + sig_CN * t_CN**2 tensions.append(tension) + continue if sym_i == 'N': t_NC = 0.0 t_NC3 = 0.0 for j, sym_j in enumerate(symbols): if sym_j == 'C': - r, dr = r_zz['N','C'] + r, dr = r_zz.get(('N','C'), (0.0,0.0)) tk = 0.0 for k, sym_k in enumerate(symbols): if k != i and k != j: - rjk, drjk = r_zz['C', sym_k] + rjk, drjk = r_zz.get(('C', sym_k), (0.0,0.0)) tk += swtich_function(rij[j,k], rjk, drjk) t_NC += swtich_function(rij[i,j], r, dr) * tk**2 - r, dr = r_zz['N','C3'] + r, dr = r_zz.get(('N','C3'), (0.0, 0.0)) t_NC3 += swtich_function(rij[i,j], r, dr) sig_NC = get_bond_tension(('N','C')) sig_NC3= get_bond_tension(('N','C3')) tension += sig_NC * t_NC**1.3 + sig_NC3 * t_NC3 tensions.append(tension) + continue if sym_i == 'O': t_OC = 0.0 @@ -426,16 +427,16 @@ def get_atom_tension(sym_i): t_OP = 0.0 for j, sym_j in enumerate(symbols): if sym_j == 'C': - r, dr = r_zz['O','C'] + r, dr = r_zz.get(('O','C'), (0.0, 0.0)) t_OC += swtich_function(rij[i,j], r, dr) if sym_j == 'N': - r, dr = r_zz['O','N'] + r, dr = r_zz.get(('O','N'), (0.0, 0.0)) t_ON += swtich_function(rij[i,j], r, dr) if sym_j == 'O' and j != i: - r, dr = r_zz['O','O'] + r, dr = r_zz.get(('O','O'), (0.0, 0.0)) t_OO += swtich_function(rij[i,j], r, dr) if sym_j == 'P': - r, dr = r_zz['O','P'] + r, dr = r_zz.get(('O','P'), (0.0, 0.0)) t_OP += swtich_function(rij[i,j], r, dr) sig_OC = get_bond_tension(('O','C')) sig_ON = get_bond_tension(('O','N')) @@ -443,6 +444,7 @@ def get_atom_tension(sym_i): sig_OP = get_bond_tension(('O','P')) tension += sig_OC * t_OC + sig_ON * t_ON + sig_OO * t_OO + sig_OP * t_OP tensions.append(tension) + continue return cupy.asarray(tensions) def molecular_surface_tension(beta, gamma, phi, psi): @@ -486,23 +488,23 @@ def get_cds(smdobj): # generate surface for calculating SASA rad = radii.VDW + 0.4/radii.BOHR - surface = pcm.gen_surface(mol, ng=302, rad=rad) + surface = pcm.gen_surface(mol, ng=smdobj.sasa_ng, rad=rad) area = surface['area'] gridslice = surface['gslice_by_atom'] SASA = cupy.asarray([cupy.sum(area[p0:p1], axis=0) for p0,p1, in gridslice]) SASA *= radii.BOHR**2 mol_cds = mol_tension * cupy.sum(SASA) / 1000 # in kcal/mol atm_cds = cupy.sum(SASA * atm_tension) / 1000 # in kcal/mol - return (mol_cds + atm_cds)/hartree2kcal # hartree class SMD(pcm.PCM): _keys = { - 'intopt', 'method', 'e_cds', 'solvent_descriptors', 'r_probe' + 'intopt', 'method', 'e_cds', 'solvent_descriptors', 'r_probe', 'sasa_ng' } def __init__(self, mol, solvent=''): super().__init__(mol) self.vdw_scale = 1.0 + self.sasa_ng = 590 # quadrature grids for calculating SASA self.r_probe = 0.4/radii.BOHR self.method = 'SMD' # use IEFPCM for electrostatic if solvent not in solvent_db: @@ -566,10 +568,24 @@ def get_cds(self): return get_cds(self) def nuc_grad_method(self, grad_method): - raise RuntimeError('SMD gradient is not implemented') + from gpu4pyscf.solvent.grad import smd as smd_grad + if self.frozen: + raise RuntimeError('Frozen solvent model is not supported') + from gpu4pyscf import scf + if isinstance(grad_method.base, scf.hf.RHF): + return smd_grad.make_grad_object(grad_method) + else: + raise RuntimeError('Only SCF gradient is supported') def Hessian(self, hess_method): - raise RuntimeError('SMD Hessian is not implemented') + from gpu4pyscf.solvent.hessian import smd as smd_hess + if self.frozen: + raise RuntimeError('Frozen solvent model is not supported') + from gpu4pyscf import scf + if isinstance(hess_method.base, scf.hf.RHF): + return smd_hess.make_hess_object(hess_method) + else: + raise RuntimeError('Only SCF gradient is supported') def reset(self, mol=None): super().reset(mol) diff --git a/gpu4pyscf/solvent/tests/test_smd.py b/gpu4pyscf/solvent/tests/test_smd.py index 45e2028a..b322f4ce 100644 --- a/gpu4pyscf/solvent/tests/test_smd.py +++ b/gpu4pyscf/solvent/tests/test_smd.py @@ -40,20 +40,23 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_cds_solvent(self): smdobj = smd.SMD(mol) + smdobj.sasa_ng = 590 smdobj.solvent = 'toluene' e_cds = smdobj.get_cds() - assert numpy.abs(e_cds - -0.0013476060879874362) < 1e-8 + assert numpy.abs(e_cds - -0.0013476016530476354) < 1e-8 def test_cds_water(self): smdobj = smd.SMD(mol) + smdobj.sasa_ng = 590 smdobj.solvent = 'water' e_cds = smdobj.get_cds() - assert numpy.abs(e_cds - 0.0022847142144050057) < 1e-8 + assert numpy.abs(e_cds - 0.0022903044356530726) < 1e-8 def test_smd_solvent(self): mf = scf.RHF(mol) mf = mf.SMD() mf.with_solvent.solvent = 'ethanol' + mf.with_solvent.sasa_ng = 590 e_tot = mf.kernel() assert numpy.abs(e_tot - -76.075066568) < 2e-4 @@ -61,9 +64,12 @@ def test_smd_water(self): mf = scf.RHF(mol) mf = mf.SMD() mf.with_solvent.solvent = 'water' + mf.with_solvent.sasa_ng = 590 e_tot = mf.kernel() assert numpy.abs(e_tot - -76.0756052903) < 2e-4 + # TODO: add more test for other molecules + if __name__ == "__main__": print("Full Tests for SMDs") unittest.main() \ No newline at end of file diff --git a/gpu4pyscf/solvent/tests/test_smd_grad.py b/gpu4pyscf/solvent/tests/test_smd_grad.py new file mode 100644 index 00000000..ec8173f7 --- /dev/null +++ b/gpu4pyscf/solvent/tests/test_smd_grad.py @@ -0,0 +1,87 @@ +# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import unittest +import numpy +from pyscf import gto +from gpu4pyscf import scf +from gpu4pyscf.solvent import pcm +from gpu4pyscf.solvent.grad import pcm as pcm_grad +from gpu4pyscf.solvent.grad import smd as smd_grad +from gpu4pyscf.solvent import smd + +def setUpModule(): + global mol + mol = gto.Mole() + mol.atom = '''P 0.000 0.000 0.000 +O 1.500 0.000 0.000 +O -1.500 0.000 0.000 +O 0.000 1.500 0.000 +O 0.000 -1.500 0.000 +H 1.000 1.000 0.000 +H -1.000 -1.000 0.000 +H 0.000 -2.000 0.000 +''' + mol.basis = 'sto3g' + mol.output = '/dev/null' + mol.build(verbose=0) + +def tearDownModule(): + global mol + mol.stdout.close() + del mol + +def _check_grad(mol, solvent='water'): + natm = mol.natm + fd_cds = numpy.zeros([natm,3]) + eps = 1e-4 + for ia in range(mol.natm): + for j in range(3): + coords = mol.atom_coords(unit='B') + coords[ia,j] += eps + mol.set_geom_(coords, unit='B') + mol.build() + + smdobj = smd.SMD(mol) + smdobj.solvent = solvent + e0_cds = smdobj.get_cds() + + coords[ia,j] -= 2.0*eps + mol.set_geom_(coords, unit='B') + mol.build() + + smdobj = smd.SMD(mol) + smdobj.solvent = solvent + e1_cds = smdobj.get_cds() + + coords[ia,j] += eps + mol.set_geom_(coords, unit='B') + fd_cds[ia,j] = (e0_cds - e1_cds) / (2.0 * eps) + + smdobj = smd.SMD(mol) + smdobj.solvent = solvent + grad_cds = smd_grad.get_cds(smdobj) + assert numpy.linalg.norm(fd_cds - grad_cds) < 1e-8 + +class KnownValues(unittest.TestCase): + def test_grad_water(self): + _check_grad(mol, solvent='water') + + def test_grad_solvent(self): + _check_grad(mol, solvent='ethanol') + +if __name__ == "__main__": + print("Full Tests for Gradient of SMD") + unittest.main() \ No newline at end of file diff --git a/gpu4pyscf/solvent/tests/test_smd_hessian.py b/gpu4pyscf/solvent/tests/test_smd_hessian.py new file mode 100644 index 00000000..24193663 --- /dev/null +++ b/gpu4pyscf/solvent/tests/test_smd_hessian.py @@ -0,0 +1,58 @@ +# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import unittest +import numpy +from pyscf import gto +from gpu4pyscf import scf +from gpu4pyscf.solvent.hessian import smd as smd_hess +from gpu4pyscf.solvent import smd + +def setUpModule(): + global mol + mol = gto.Mole() + mol.atom = '''P 0.000 0.000 0.000 +O 1.500 0.000 0.000 +O -1.500 0.000 0.000 +O 0.000 1.500 0.000 +O 0.000 -1.500 0.000 +H 1.000 1.000 0.000 +H -1.000 -1.000 0.000 +H 0.000 -2.000 0.000 +''' + mol.basis = 'sto3g' + mol.output = '/dev/null' + mol.build(verbose=0) + +def tearDownModule(): + global mol + mol.stdout.close() + del mol + +def _check_hess(mol, solvent='water'): + smdobj = smd.SMD(mol) + smdobj.solvent = solvent + smd_hess.get_cds(smdobj) + +class KnownValues(unittest.TestCase): + def test_hess_water(self): + _check_hess(mol, solvent='water') + + def test_hess_solvent(self): + _check_hess(mol, solvent='ethanol') + +if __name__ == "__main__": + print("Full Tests for Hessian of SMD") + unittest.main() \ No newline at end of file