Skip to content

Commit

Permalink
efficient PCM model (pyscf#150)
Browse files Browse the repository at this point in the history
* efficient PCM model

* flake8

* pcm_d_s.cu -> pcm.cu

* improve pcm cuda code

* uncomment pcm

* uncomment pcm

* fixed bug in pcm model
  • Loading branch information
wxj6000 authored May 12, 2024
1 parent 0509a00 commit c4fba3c
Show file tree
Hide file tree
Showing 7 changed files with 292 additions and 17 deletions.
1 change: 1 addition & 0 deletions gpu4pyscf/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ add_subdirectory(gint)
add_subdirectory(gvhf)
add_subdirectory(gdft)
add_subdirectory(cupy_helper)
add_subdirectory(solvent)

option(BUILD_LIBXC "Using libxc for DFT" ON)
if(BUILD_LIBXC)
Expand Down
25 changes: 25 additions & 0 deletions gpu4pyscf/lib/solvent/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# gpu4pyscf is a plugin to use Nvidia GPU in PySCF package
#
# Copyright (C) 2022 Qiming Sun
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

#set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -arch=sm_80")

add_library(solvent SHARED
pcm.cu
)

set_target_properties(solvent PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR})
set_target_properties(solvent PROPERTIES CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")
166 changes: 166 additions & 0 deletions gpu4pyscf/lib/solvent/pcm.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/* Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <cuda_runtime.h>
#include <stdio.h>

#define THREADS 32
#define SQRT2_PI 0.7978845608028654
#define SQRT_PI 1.7724538509055159

// D and S matrix in J. Chem. Phys. 133, 244111 (2010)
__global__
static void _pcm_d_s(double *matrix_d, double *matrix_s,
const double *coords, const double *norm_vec, const double *r_vdw,
const double *charge_exp, const double *switch_fun,
int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (i >= n || j >= n){
return;
}

// calculate xi
double ei = charge_exp[i];
double ej = charge_exp[j];
double xi_ij = ei * ej / sqrt(ei*ei + ej*ej);

// calculate r
double xi = coords[3*i];
double yi = coords[3*i+1];
double zi = coords[3*i+2];
double xj = coords[3*j];
double yj = coords[3*j+1];
double zj = coords[3*j+2];
double dx = xi - xj;
double dy = yi - yj;
double dz = zi - zj;
double rij = norm3d(dx, dy, dz);

double xi_r_ij = xi_ij * rij;
if (i == j) rij = 1.0;
double s = erf(xi_r_ij) / rij;
if (i == j) s = charge_exp[i] * SQRT2_PI / switch_fun[i];
matrix_s[i*n+j] = s;

if (matrix_d != NULL){
double nxj = norm_vec[3*j];
double nyj = norm_vec[3*j+1];
double nzj = norm_vec[3*j+2];

double nrij = 0.0;
nrij += (xi - xj) * nxj;
nrij += (yi - yj) * nyj;
nrij += (zi - zj) * nzj;

double rij2 = rij*rij;
double rij3 = rij2*rij;
double xi_r2_ij = xi_r_ij * xi_r_ij;
double d = s * nrij / rij2 - 2.0*xi_r_ij/SQRT_PI*exp(-xi_r2_ij)*nrij/rij3;
if (i == j) d = -charge_exp[i] * SQRT2_PI / (2.0*r_vdw[i]);
matrix_d[i*n+j] = d;
}
}

__global__
static void _pcm_dD_dS(double *matrix_dd, double *matrix_ds,
const double *coords, const double *norm_vec, const double *r_vdw,
const double *charge_exp, const double *switch_fun,
int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (i >= n || j >= n){
return;
}

// calculate xi
double ei = charge_exp[i];
double ej = charge_exp[j];
double xi_ij = ei * ej / sqrt(ei*ei + ej*ej);

// calculate r
double dx = coords[3*i] - coords[3*j];
double dy = coords[3*i+1] - coords[3*j+1];
double dz = coords[3*i+2] - coords[3*j+2];
double rij = norm3d(dx, dy, dz);

double xi_r_ij = xi_ij * rij;
double xi_r2_ij = xi_r_ij * xi_r_ij;
if (i == j) rij = 1.0;
double rij2 = rij*rij;

double dS_dr = -(erf(xi_r_ij) - 2.0*xi_r_ij/ SQRT_PI * exp(-xi_r2_ij)) / rij2;
if (i == j) dS_dr = 0.0;
double dx_rij = dx / rij;
double dy_rij = dy / rij;
double dz_rij = dz / rij;

matrix_ds[3*(i*n+j)] = dS_dr * dx_rij;
matrix_ds[3*(i*n+j)+1] = dS_dr * dy_rij;
matrix_ds[3*(i*n+j)+2] = dS_dr * dz_rij;

if (matrix_dd != NULL){
double nxj = norm_vec[3*j];
double nyj = norm_vec[3*j+1];
double nzj = norm_vec[3*j+2];
double nj_rij = dx*nxj + dy*nyj + dz*nzj;
double rij3 = rij2*rij;
double dD_dri = 4.0*xi_r2_ij*xi_ij / SQRT_PI*exp(-xi_r2_ij)*nj_rij/rij3;
if (i == j) dD_dri = 0.0;

matrix_dd[3*(i*n+j)] = dD_dri*dx_rij + dS_dr*(-nxj/rij + 3.0*nj_rij/rij2*dx_rij);
matrix_dd[3*(i*n+j)+1] = dD_dri*dy_rij + dS_dr*(-nyj/rij + 3.0*nj_rij/rij2*dy_rij);
matrix_dd[3*(i*n+j)+2] = dD_dri*dz_rij + dS_dr*(-nzj/rij + 3.0*nj_rij/rij2*dz_rij);
}
}

extern "C" {
int pcm_d_s(cudaStream_t stream, double *matrix_d, double *matrix_s,
const double *coords, const double *norm_vec, const double *r_vdw,
const double *charge_exp, const double *switch_fun,
int n)
{
int ntilex = (n + THREADS - 1) / THREADS;
int ntiley = (n + THREADS - 1) / THREADS;
dim3 threads(THREADS, THREADS);
dim3 blocks(ntilex, ntiley);
_pcm_d_s<<<blocks, threads, 0, stream>>>(matrix_d, matrix_s, coords, norm_vec, r_vdw, charge_exp, switch_fun, n);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
return 1;
}
return 0;
}

int pcm_dd_ds(cudaStream_t stream, double *matrix_dD, double *matrix_dS,
const double *coords, const double *norm_vec, const double *r_vdw,
const double *charge_exp, const double *switch_fun,
int n)
{
int ntilex = (n + THREADS - 1) / THREADS;
int ntiley = (n + THREADS - 1) / THREADS;
dim3 threads(THREADS, THREADS);
dim3 blocks(ntilex, ntiley);
_pcm_dD_dS<<<blocks, threads, 0, stream>>>(matrix_dD, matrix_dS, coords, norm_vec, r_vdw, charge_exp, switch_fun, n);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
return 1;
}
return 0;
}
}
40 changes: 38 additions & 2 deletions gpu4pyscf/solvent/grad/pcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,20 @@

import numpy
import cupy
import ctypes
from cupyx import scipy
from pyscf import lib
from pyscf import gto
from pyscf.grad import rhf as rhf_grad

from gpu4pyscf.solvent.pcm import PI, switch_h
from gpu4pyscf.df import int3c2e
from gpu4pyscf.lib.cupy_helper import contract
from gpu4pyscf.lib.cupy_helper import contract, load_library
from gpu4pyscf.lib import logger
from pyscf import lib as pyscf_lib

libdft = lib.load_library('libdft')
libsolvent = load_library('libsolvent')

def grad_switch_h(x):
''' first derivative of h(x)'''
Expand Down Expand Up @@ -98,7 +101,7 @@ def get_dF_dA(surface):

return dF, dA

def get_dD_dS(surface, dF, with_S=True, with_D=False):
def get_dD_dS_slow(surface, dF, with_S=True, with_D=False):
'''
derivative of D and S w.r.t grids, partial_i D_ij = -partial_j D_ij
S is symmetric, D is not
Expand Down Expand Up @@ -141,6 +144,39 @@ def get_dD_dS(surface, dF, with_S=True, with_D=False):

return dD, dS, dSii

def get_dD_dS(surface, dF, with_S=True, with_D=False, stream=None):
charge_exp = surface['charge_exp']
grid_coords = surface['grid_coords']
switch_fun = surface['switch_fun']
norm_vec = surface['norm_vec']
R_vdw = surface['R_vdw']
n = charge_exp.shape[0]
dS = cupy.empty([n,n,3])
dD = None
dS_ptr = ctypes.cast(dS.data.ptr, ctypes.c_void_p)
dD_ptr = pyscf_lib.c_null_ptr()
if with_D:
dD = cupy.empty([n,n,3])
dD_ptr = ctypes.cast(dD.data.ptr, ctypes.c_void_p)
if stream is None:
stream = cupy.cuda.get_current_stream()
err = libsolvent.pcm_dd_ds(
ctypes.cast(stream.ptr, ctypes.c_void_p),
dD_ptr, dS_ptr,
ctypes.cast(grid_coords.data.ptr, ctypes.c_void_p),
ctypes.cast(norm_vec.data.ptr, ctypes.c_void_p),
ctypes.cast(R_vdw.data.ptr, ctypes.c_void_p),
ctypes.cast(charge_exp.data.ptr, ctypes.c_void_p),
ctypes.cast(switch_fun.data.ptr, ctypes.c_void_p),
ctypes.c_int(n)
)
if err != 0:
raise RuntimeError('Failed in generating PCM dD and dS matrices.')

dSii_dF = -charge_exp * (2.0/PI)**0.5 / switch_fun**2
dSii = cupy.expand_dims(dSii_dF, axis=(1,2)) * dF
return dD, dS, dSii

def grad_nuc(pcmobj, dm):
mol = pcmobj.mol
log = logger.new_logger(mol, mol.verbose)
Expand Down
45 changes: 33 additions & 12 deletions gpu4pyscf/solvent/pcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@
from gpu4pyscf.solvent import _attach_solvent
from gpu4pyscf.df import int3c2e
from gpu4pyscf.lib import logger
from gpu4pyscf.lib.cupy_helper import dist_matrix
from gpu4pyscf.lib.cupy_helper import dist_matrix, load_library

libdft = lib.load_library('libdft')
libsolvent = load_library('libsolvent')

@lib.with_doc(_attach_solvent._for_scf.__doc__)
def pcm_for_scf(mf, solvent_obj=None, dm=None):
Expand Down Expand Up @@ -179,7 +180,7 @@ def get_F_A(surface):
A = weights*R_vdw**2*switch_fun
return switch_fun, A

def get_D_S(surface, with_S=True, with_D=False):
def get_D_S_slow(surface, with_S=True, with_D=False):
'''
generate D and S matrix in J. Chem. Phys. 133, 244111 (2010)
The diagonal entries of S is not filled
Expand All @@ -192,8 +193,6 @@ def get_D_S(surface, with_S=True, with_D=False):

xi_i, xi_j = cupy.meshgrid(charge_exp, charge_exp, indexing='ij')
xi_ij = xi_i * xi_j / (xi_i**2 + xi_j**2)**0.5
#rij = scipy.spatial.distance.cdist(grid_coords, grid_coords)
#rij = cupy.sum((grid_coords[:,None,:] - grid_coords[None,:,:])**2, axis=2)**0.5
rij = dist_matrix(grid_coords, grid_coords)
xi_r_ij = xi_ij * rij
cupy.fill_diagonal(rij, 1)
Expand All @@ -202,17 +201,40 @@ def get_D_S(surface, with_S=True, with_D=False):

D = None
if with_D:
#drij = cupy.expand_dims(grid_coords, axis=1) - grid_coords
#nri = cupy.sum(grid_coords * norm_vec, axis=-1)
#nrij = cupy.expand_dims(nri, axis=1) - nri
#cupy.expand_dims(grid_coords, axis=1) * norm_vec
nrij = grid_coords.dot(norm_vec.T) - cupy.sum(grid_coords * norm_vec, axis=-1)

#nrij = cupy.sum(drij * norm_vec, axis=-1)

D = S*nrij/rij**2 -2.0*xi_r_ij/PI**0.5*cupy.exp(-xi_r_ij**2)*nrij/rij**3
cupy.fill_diagonal(D, -charge_exp * (2.0 / PI)**0.5 / (2.0 * R_vdw))
return D, S

def get_D_S(surface, with_S=True, with_D=False, stream=None):
''' Efficiently generating D matrix and S matrix in PCM models '''
charge_exp = surface['charge_exp']
grid_coords = surface['grid_coords']
switch_fun = surface['switch_fun']
norm_vec = surface['norm_vec']
R_vdw = surface['R_vdw']
n = charge_exp.shape[0]
S = cupy.empty([n,n])
D = None
S_ptr = ctypes.cast(S.data.ptr, ctypes.c_void_p)
D_ptr = lib.c_null_ptr()
if with_D:
D = cupy.empty([n,n])
D_ptr = ctypes.cast(D.data.ptr, ctypes.c_void_p)
if stream is None:
stream = cupy.cuda.get_current_stream()
err = libsolvent.pcm_d_s(
ctypes.cast(stream.ptr, ctypes.c_void_p),
D_ptr, S_ptr,
ctypes.cast(grid_coords.data.ptr, ctypes.c_void_p),
ctypes.cast(norm_vec.data.ptr, ctypes.c_void_p),
ctypes.cast(R_vdw.data.ptr, ctypes.c_void_p),
ctypes.cast(charge_exp.data.ptr, ctypes.c_void_p),
ctypes.cast(switch_fun.data.ptr, ctypes.c_void_p),
ctypes.c_int(n)
)
if err != 0:
raise RuntimeError('Failed in generating PCM D and S matrices.')
return D, S

class PCM(lib.StreamObject):
Expand Down Expand Up @@ -301,7 +323,6 @@ def build(self, ng=None):
atom_coords = mol.atom_coords(unit='B')
atom_charges = mol.atom_charges()

# Move this to GPU
auxmol = gto.fakemol_for_charges(grid_coords.get(), expnt=charge_exp.get()**2)
intopt = int3c2e.VHFOpt(mol, auxmol, 'int2e')
intopt.build(1e-14, diag_block_with_triu=False, aosym=True, group_size=256)
Expand Down
Loading

0 comments on commit c4fba3c

Please sign in to comment.