From cc16db17f6891e3c03cc54be44b4d61bc35abc2e Mon Sep 17 00:00:00 2001 From: Henry Wang Date: Wed, 18 Dec 2024 08:09:56 +0800 Subject: [PATCH] Replace int1e in pcm and qmmm --- gpu4pyscf/gto/int3c1e.py | 4 --- gpu4pyscf/gto/int3c1e_ip.py | 31 +++++++++++------ gpu4pyscf/gto/moleintor.py | 16 +++++++-- gpu4pyscf/lib/gint/nr_fill_ao_int3c1e.cu | 4 +-- gpu4pyscf/lib/gint/nr_fill_ao_int3c1e_ip.cu | 4 +-- gpu4pyscf/qmmm/pbc/itrf.py | 38 +++------------------ gpu4pyscf/solvent/grad/pcm.py | 27 ++++----------- gpu4pyscf/solvent/pcm.py | 21 +++++++----- gpu4pyscf/solvent/smd.py | 10 +++--- 9 files changed, 68 insertions(+), 87 deletions(-) diff --git a/gpu4pyscf/gto/int3c1e.py b/gpu4pyscf/gto/int3c1e.py index 1b294842..fe0041d8 100644 --- a/gpu4pyscf/gto/int3c1e.py +++ b/gpu4pyscf/gto/int3c1e.py @@ -224,7 +224,6 @@ def get_int3c1e(mol, grids, charge_exponents, intopt): lj = intopt.angular[cpj] stream = cp.cuda.get_current_stream() - nao_cart = intopt._sorted_mol.nao log_q_ij = intopt.log_qs[cp_ij_id] @@ -252,7 +251,6 @@ def get_int3c1e(mol, grids, charge_exponents, intopt): ctypes.cast(charge_exponents_pointer, ctypes.c_void_p), ctypes.c_int(ngrids_of_split), ctypes.cast(int3c_angular_slice.data.ptr, ctypes.c_void_p), - ctypes.c_int(nao_cart), strides.ctypes.data_as(ctypes.c_void_p), ao_offsets.ctypes.data_as(ctypes.c_void_p), bins_locs_ij.ctypes.data_as(ctypes.c_void_p), @@ -303,7 +301,6 @@ def get_int3c1e_charge_contracted(mol, grids, charge_exponents, charges, intopt) lj = intopt.angular[cpj] stream = cp.cuda.get_current_stream() - nao_cart = intopt._sorted_mol.nao log_q_ij = intopt.log_qs[cp_ij_id] @@ -336,7 +333,6 @@ def get_int3c1e_charge_contracted(mol, grids, charge_exponents, charges, intopt) ctypes.cast(charge_exponents_pointer, ctypes.c_void_p), ctypes.c_int(ngrids), ctypes.cast(int1e_angular_slice.data.ptr, ctypes.c_void_p), - ctypes.c_int(nao_cart), strides.ctypes.data_as(ctypes.c_void_p), ao_offsets.ctypes.data_as(ctypes.c_void_p), bins_locs_ij.ctypes.data_as(ctypes.c_void_p), diff --git a/gpu4pyscf/gto/int3c1e_ip.py b/gpu4pyscf/gto/int3c1e_ip.py index 3a024606..e045b97e 100644 --- a/gpu4pyscf/gto/int3c1e_ip.py +++ b/gpu4pyscf/gto/int3c1e_ip.py @@ -60,7 +60,6 @@ def get_int3c1e_ip(mol, grids, charge_exponents, intopt): lj = intopt.angular[cpj] stream = cp.cuda.get_current_stream() - nao_cart = intopt._sorted_mol.nao log_q_ij = intopt.log_qs[cp_ij_id] @@ -88,7 +87,6 @@ def get_int3c1e_ip(mol, grids, charge_exponents, intopt): ctypes.cast(charge_exponents_pointer, ctypes.c_void_p), ctypes.c_int(ngrids_of_split), ctypes.cast(int3c_angular_slice.data.ptr, ctypes.c_void_p), - ctypes.c_int(nao_cart), strides.ctypes.data_as(ctypes.c_void_p), ao_offsets.ctypes.data_as(ctypes.c_void_p), bins_locs_ij.ctypes.data_as(ctypes.c_void_p), @@ -126,7 +124,14 @@ def get_int3c1e_ip1_charge_contracted(mol, grids, charge_exponents, charges, int omega = mol.omega assert omega >= 0.0, "Short-range one electron integrals with GPU acceleration is not implemented." - charges = cp.asarray(charges).reshape([-1, 1], order='C') + grids = cp.asarray(grids, order='C') + if charge_exponents is not None: + charge_exponents = cp.asarray(charge_exponents, order='C') + + assert charges.ndim == 1 and charges.shape[0] == grids.shape[0] + charges = cp.asarray(charges) + + charges = charges.reshape([-1, 1], order='C') grids = cp.concatenate([grids, charges], axis=1) int1e_charge_contracted = cp.zeros([3, mol.nao, mol.nao], order='C') @@ -137,7 +142,6 @@ def get_int3c1e_ip1_charge_contracted(mol, grids, charge_exponents, charges, int lj = intopt.angular[cpj] stream = cp.cuda.get_current_stream() - nao_cart = intopt._sorted_mol.nao log_q_ij = intopt.log_qs[cp_ij_id] @@ -170,7 +174,6 @@ def get_int3c1e_ip1_charge_contracted(mol, grids, charge_exponents, charges, int ctypes.cast(charge_exponents_pointer, ctypes.c_void_p), ctypes.c_int(ngrids), ctypes.cast(int1e_angular_slice.data.ptr, ctypes.c_void_p), - ctypes.c_int(nao_cart), strides.ctypes.data_as(ctypes.c_void_p), ao_offsets.ctypes.data_as(ctypes.c_void_p), bins_locs_ij.ctypes.data_as(ctypes.c_void_p), @@ -279,7 +282,7 @@ def get_int3c1e_ip2_density_contracted(mol, grids, charge_exponents, dm, intopt) return int3c_density_contracted -def get_int3c1e_ip_contracted(mol, grids, charge_exponents, dm, charges, intopt): +def get_int3c1e_ip_contracted(mol, grids, charge_exponents, dm, charges, intopt, if_ip1, if_ip2): dm = cp.asarray(dm) if dm.ndim == 3: if dm.shape[0] > 2: @@ -297,10 +300,16 @@ def get_int3c1e_ip_contracted(mol, grids, charge_exponents, dm, charges, intopt) assert charges.ndim == 1 and charges.shape[0] == grids.shape[0] charges = cp.asarray(charges) - int3c_ip2 = get_int3c1e_ip2_density_contracted(mol, grids, charge_exponents, dm, intopt) - int3c_ip2 = int3c_ip2 * charges + if if_ip1: + int3c_ip1 = get_int3c1e_ip1_charge_contracted(mol, grids, charge_exponents, charges, intopt) + int3c_ip1 = cp.einsum('xji,ij->xi', int3c_ip1, dm) + if not if_ip2: + return int3c_ip1 - int3c_ip1 = get_int3c1e_ip1_charge_contracted(mol, grids, charge_exponents, charges, intopt) - int3c_ip1 = cp.einsum('xji,ij->xi', int3c_ip1, dm) + if if_ip2: + int3c_ip2 = get_int3c1e_ip2_density_contracted(mol, grids, charge_exponents, dm, intopt) + int3c_ip2 = int3c_ip2 * charges + if not if_ip1: + return int3c_ip2 - return int3c_ip1, int3c_ip2 \ No newline at end of file + return int3c_ip1, int3c_ip2 diff --git a/gpu4pyscf/gto/moleintor.py b/gpu4pyscf/gto/moleintor.py index 3c1ad6fb..1dc949fd 100644 --- a/gpu4pyscf/gto/moleintor.py +++ b/gpu4pyscf/gto/moleintor.py @@ -18,7 +18,7 @@ import numpy as np from gpu4pyscf.gto.int3c1e import VHFOpt, get_int3c1e, get_int3c1e_density_contracted, get_int3c1e_charge_contracted -from gpu4pyscf.gto.int3c1e_ip import get_int3c1e_ip, get_int3c1e_ip_contracted +from gpu4pyscf.gto.int3c1e_ip import get_int3c1e_ip, get_int3c1e_ip_contracted, get_int3c1e_ip1_charge_contracted def intor(mol, intor, grids, charge_exponents=None, dm=None, charges=None, direct_scf_tol=1e-13, intopt=None): assert grids is not None @@ -54,6 +54,18 @@ def intor(mol, intor, grids, charge_exponents=None, dm=None, charges=None, direc else: assert dm is not None assert charges is not None - return get_int3c1e_ip_contracted(mol, grids, charge_exponents, dm, charges, intopt) + return get_int3c1e_ip_contracted(mol, grids, charge_exponents, dm, charges, intopt, True, True) + elif intor == 'int1e_grids_ip1': + assert not intopt.aosym + assert charges is not None + if dm is not None: + return get_int3c1e_ip_contracted(mol, grids, charge_exponents, dm, charges, intopt, True, False) + else: + return get_int3c1e_ip1_charge_contracted(mol, grids, charge_exponents, charges, intopt) + elif intor == 'int1e_grids_ip2': + assert not intopt.aosym + assert dm is not None + assert charges is not None + return get_int3c1e_ip_contracted(mol, grids, charge_exponents, dm, charges, intopt, False, True) else: raise NotImplementedError(f"GPU intor {intor} is not implemented.") diff --git a/gpu4pyscf/lib/gint/nr_fill_ao_int3c1e.cu b/gpu4pyscf/lib/gint/nr_fill_ao_int3c1e.cu index 87eeb87e..db342037 100644 --- a/gpu4pyscf/lib/gint/nr_fill_ao_int3c1e.cu +++ b/gpu4pyscf/lib/gint/nr_fill_ao_int3c1e.cu @@ -147,7 +147,7 @@ static int GINTfill_int3c1e_density_contracted_tasks(double* output, const doubl extern "C" { int GINTfill_int3c1e(const cudaStream_t stream, const BasisProdCache* bpcache, const double* grid_points, const double* charge_exponents, const int ngrids, - double* integrals, const int nao, + double* integrals, const int* strides, const int* ao_offsets, const int* bins_locs_ij, int nbins, const int cp_ij_id, const double omega) @@ -197,7 +197,7 @@ int GINTfill_int3c1e(const cudaStream_t stream, const BasisProdCache* bpcache, int GINTfill_int3c1e_charge_contracted(const cudaStream_t stream, const BasisProdCache* bpcache, const double* grid_points, const double* charge_exponents, const int ngrids, - double* integral_charge_contracted, const int nao, + double* integral_charge_contracted, const int* strides, const int* ao_offsets, const int* bins_locs_ij, int nbins, const int cp_ij_id, const double omega, const int n_charge_sum_per_thread) diff --git a/gpu4pyscf/lib/gint/nr_fill_ao_int3c1e_ip.cu b/gpu4pyscf/lib/gint/nr_fill_ao_int3c1e_ip.cu index 095cf8c5..fc43c0a3 100644 --- a/gpu4pyscf/lib/gint/nr_fill_ao_int3c1e_ip.cu +++ b/gpu4pyscf/lib/gint/nr_fill_ao_int3c1e_ip.cu @@ -150,7 +150,7 @@ static int GINTfill_int3c1e_ip2_density_contracted_tasks(double* output, const d extern "C" { int GINTfill_int3c1e_ip(const cudaStream_t stream, const BasisProdCache* bpcache, const double* grid_points, const double* charge_exponents, const int ngrids, - double* integrals, const int nao, + double* integrals, const int* strides, const int* ao_offsets, const int* bins_locs_ij, int nbins, const int cp_ij_id, const double omega) @@ -200,7 +200,7 @@ int GINTfill_int3c1e_ip(const cudaStream_t stream, const BasisProdCache* bpcache int GINTfill_int3c1e_ip1_charge_contracted(const cudaStream_t stream, const BasisProdCache* bpcache, const double* grid_points, const double* charge_exponents, const int ngrids, - double* integral_charge_contracted, const int nao, + double* integral_charge_contracted, const int* strides, const int* ao_offsets, const int* bins_locs_ij, int nbins, const int cp_ij_id, const double omega, const int n_charge_sum_per_thread) diff --git a/gpu4pyscf/qmmm/pbc/itrf.py b/gpu4pyscf/qmmm/pbc/itrf.py index 986ae2f2..d6084bfe 100644 --- a/gpu4pyscf/qmmm/pbc/itrf.py +++ b/gpu4pyscf/qmmm/pbc/itrf.py @@ -26,10 +26,9 @@ import cupy as cp from gpu4pyscf import scf from gpu4pyscf.qmmm.pbc import mm_mole -from gpu4pyscf.df import int3c2e -from gpu4pyscf.df.df import ALIGNED, MIN_BLK_SIZE from gpu4pyscf.lib import cupy_helper from gpu4pyscf.qmmm.pbc.tools import get_multipole_tensors_pp, get_multipole_tensors_pg +from gpu4pyscf.gto.moleintor import intor as gpu_intor contract = cupy_helper.contract @@ -210,14 +209,7 @@ def get_hcore(self, mol=None): logger.note(self, '%d MM charges see directly QM density'%charges.shape[0]) if mm_mol.charge_model == 'gaussian' and len(coords) != 0: expnts = cp.hstack([mm_mol.get_zetas()] * len(Ls))[mask] - # FIXME slice mm coords when memory not enough - fakemol = gto.fakemol_for_charges(coords.get(), expnts.get()) - - intopt = int3c2e.VHFOpt(mol, fakemol, 'int2e') - intopt.build(self.direct_scf_tol, diag_block_with_triu=False, aosym=True, - group_size=int3c2e.BLKSIZE, group_size_aux=int3c2e.BLKSIZE) - h1e += int3c2e.get_j_int3c2e_pass2(intopt, -charges) - intopt = None + h1e += gpu_intor(mol, "int1e_grids", coords, charges = -charges, charge_exponents = expnts) elif mm_mol.charge_model != 'point' and len(coords) != 0: # TODO test this block raise RuntimeError("Not tested yet") @@ -1017,18 +1009,9 @@ def calculate_h1e(self, h1_gpu): nao = mol.nao if mm_mol.charge_model == 'gaussian' and len(coords) != 0: expnts = cp.hstack([mm_mol.get_zetas()] * len(Ls))[mask] - fakemol = gto.fakemol_for_charges(coords.get(), expnts.get()) - - intopt = int3c2e.VHFOpt(mol, fakemol, 'int2e') - intopt.build(self.base.direct_scf_tol, diag_block_with_triu=True, aosym=False, - group_size=int3c2e.BLKSIZE, group_size_aux=int3c2e.BLKSIZE) - - v = cp.zeros_like(g_qm) - for i0,i1,j0,j1,k0,k1,j3c in int3c2e.loop_int3c2e_general(intopt, ip_type='ip1'): - v[:,i0:i1,j0:j1] += contract('xkji,k->xij', j3c, charges[k0:k1]) - v = intopt.unsort_orbitals(v, axis=[1,2]) - g_qm += v #cupy_helper.take_last2d(v, intopt.rev_ao_idx) + g_qm += gpu_intor(mol, "int1e_grids_ip1", coords, charges = charges, charge_exponents = expnts).transpose(0,2,1) elif mm_mol.charge_model == 'point' and len(coords) != 0: + raise RuntimeError("Not tested yet") max_memory = self.max_memory - lib.current_memory()[0] blksize = int(min(max_memory*1e6/8/nao**2/3, 200)) blksize = max(blksize, 1) @@ -1072,19 +1055,8 @@ def grad_hcore_mm(self, dm, mol=None): g = cp.zeros_like(all_coords) if len(coords) != 0: - g_ = cp.zeros_like(coords) expnts = cp.hstack([mm_mol.get_zetas()] * len(Ls))[mask] - fakemol = gto.fakemol_for_charges(coords.get(), expnts.get()) - - intopt = int3c2e.VHFOpt(mol, fakemol, 'int2e') - intopt.build(self.base.direct_scf_tol, diag_block_with_triu=True, aosym=False, - group_size=int3c2e.BLKSIZE, group_size_aux=int3c2e.BLKSIZE) - - dm_ = intopt.sort_orbitals(dm, axis=[0,1]) - for i0,i1,j0,j1,k0,k1,j3c in int3c2e.loop_int3c2e_general(intopt, ip_type='ip2'): - j3c = contract('xkji,k->xkji', j3c, charges[k0:k1]) - g_[k0:k1] += contract('xkji,ij->kx', j3c, dm_[i0:i1,j0:j1]) - g[mask] = g_ + g[mask] = gpu_intor(mol, "int1e_grids_ip2", coords, dm = dm, charges = charges, charge_exponents = expnts).T g = g.reshape(len(Ls), -1, 3) g = np.sum(g, axis=0) diff --git a/gpu4pyscf/solvent/grad/pcm.py b/gpu4pyscf/solvent/grad/pcm.py index f7e63a23..78758680 100644 --- a/gpu4pyscf/solvent/grad/pcm.py +++ b/gpu4pyscf/solvent/grad/pcm.py @@ -27,8 +27,8 @@ from pyscf.grad import rhf as rhf_grad from gpu4pyscf.solvent.pcm import PI, switch_h, libsolvent -from gpu4pyscf.df import int3c2e -from gpu4pyscf.lib.cupy_helper import contract, load_library +from gpu4pyscf.gto.moleintor import intor +from gpu4pyscf.lib.cupy_helper import contract from gpu4pyscf.lib import logger from pyscf import lib as pyscf_lib @@ -240,24 +240,11 @@ def grad_qv(pcmobj, dm): grid_coords = pcmobj.surface['grid_coords'] q_sym = pcmobj._intermediates['q_sym'] - auxmol = gto.fakemol_for_charges(grid_coords.get(), expnt=charge_exp.get()**2) - intopt = int3c2e.VHFOpt(mol, auxmol, 'int2e') - intopt.build(1e-14, diag_block_with_triu=True, aosym=False) - coeff = intopt.coeff - dm_cart = coeff @ dm @ coeff.T - - dvj, _ = int3c2e.get_int3c2e_ip_jk(intopt, 0, 'ip1', q_sym, None, dm_cart) - dq, _ = int3c2e.get_int3c2e_ip_jk(intopt, 0, 'ip2', q_sym, None, dm_cart) - - _sorted_mol = intopt._sorted_mol - nao_cart = _sorted_mol.nao - natm = _sorted_mol.natm - ao2atom = numpy.zeros([nao_cart, natm]) - ao_loc = _sorted_mol.ao_loc - for ibas, iatm in enumerate(_sorted_mol._bas[:,gto.ATOM_OF]): - ao2atom[ao_loc[ibas]:ao_loc[ibas+1],iatm] = 1 - ao2atom = cupy.asarray(ao2atom) - dvj = 2.0 * ao2atom.T @ dvj.T + dvj, dq = intor(mol, "int1e_grids_ip", grid_coords, dm = dm, charges = q_sym, direct_scf_tol = 1e-14, charge_exponents = charge_exp**2) + + aoslice = mol.aoslice_by_atom() + aoslice = cupy.array(aoslice) + dvj = 2.0 * cupy.asarray([cupy.sum(dvj[:,p0:p1], axis=1) for p0,p1 in aoslice[:,2:]]) dq = cupy.asarray([cupy.sum(dq[:,p0:p1], axis=1) for p0,p1 in gridslice]) de = dq + dvj t1 = log.timer_debug1('grad qv', *t1) diff --git a/gpu4pyscf/solvent/pcm.py b/gpu4pyscf/solvent/pcm.py index ef8c0749..9f902b6b 100644 --- a/gpu4pyscf/solvent/pcm.py +++ b/gpu4pyscf/solvent/pcm.py @@ -27,7 +27,8 @@ from pyscf.data import radii from pyscf.solvent import ddcosmo from gpu4pyscf.solvent import _attach_solvent -from gpu4pyscf.df import int3c2e +from gpu4pyscf.gto import int3c1e +from gpu4pyscf.gto.moleintor import intor from gpu4pyscf.lib import logger from gpu4pyscf.lib.cupy_helper import dist_matrix, load_library @@ -346,14 +347,14 @@ def build(self, ng=None): atom_coords = mol.atom_coords(unit='B') atom_charges = mol.atom_charges() - auxmol = gto.fakemol_for_charges(grid_coords.get(), expnt=charge_exp.get()**2) - intopt = int3c2e.VHFOpt(mol, auxmol, 'int2e') - intopt.build(1e-14, diag_block_with_triu=False, aosym=True, group_size=256) + intopt = int3c1e.VHFOpt(mol) + intopt.build(1e-14) self.intopt = intopt int2c2e = mol._add_suffix('int2c2e') + fakemol_charge = gto.fakemol_for_charges(grid_coords.get(), expnt=charge_exp.get()**2) fakemol_nuc = gto.fakemol_for_charges(atom_coords) - v_ng = gto.mole.intor_cross(int2c2e, fakemol_nuc, auxmol) + v_ng = gto.mole.intor_cross(int2c2e, fakemol_nuc, fakemol_charge) v_grids_n = numpy.dot(atom_charges, v_ng) self.v_grids_n = cupy.asarray(v_grids_n) @@ -393,19 +394,23 @@ def _get_v(self, dms): return electrostatic potential on surface ''' nset = dms.shape[0] - ngrids = self.surface['grid_coords'].shape[0] + charge_exp = self.surface['charge_exp'] + grid_coords = self.surface['grid_coords'] + ngrids = grid_coords.shape[0] v_grids_e = cupy.empty([nset, ngrids]) for i in range(nset): - v_grids_e[i] = 2.0*int3c2e.get_j_int3c2e_pass1(self.intopt, dms[i]) + v_grids_e[i] = intor(self.mol, "int1e_grids", grid_coords, dm = dms[i], charge_exponents = charge_exp**2, intopt = self.intopt) return v_grids_e def _get_vmat(self, q): assert q.ndim == 2 nset = q.shape[0] nao = self.mol.nao + charge_exp = self.surface['charge_exp'] + grid_coords = self.surface['grid_coords'] vmat = cupy.empty([nset, nao, nao]) for i in range(nset): - vmat[i] = -int3c2e.get_j_int3c2e_pass2(self.intopt, q[i]) + vmat[i] = -intor(self.mol, "int1e_grids", grid_coords, charges = q[i], charge_exponents = charge_exp**2, intopt = self.intopt) return vmat def nuc_grad_method(self, grad_method): diff --git a/gpu4pyscf/solvent/smd.py b/gpu4pyscf/solvent/smd.py index 1d599065..10ba211d 100644 --- a/gpu4pyscf/solvent/smd.py +++ b/gpu4pyscf/solvent/smd.py @@ -24,7 +24,7 @@ from pyscf.dft import gen_grid from gpu4pyscf.solvent import pcm, _attach_solvent from gpu4pyscf.lib import logger -from gpu4pyscf.df import int3c2e +from gpu4pyscf.gto import int3c1e @lib.with_doc(_attach_solvent._for_scf.__doc__) def smd_for_scf(mf, solvent_obj=None, dm=None): @@ -398,14 +398,14 @@ def build(self, ng=None): atom_charges = mol.atom_charges() # Move this to GPU - auxmol = gto.fakemol_for_charges(grid_coords.get(), expnt=charge_exp.get()**2) - intopt = int3c2e.VHFOpt(mol, auxmol, 'int2e') - intopt.build(1e-14, diag_block_with_triu=False, aosym=True, group_size=256) + intopt = int3c1e.VHFOpt(mol) + intopt.build(1e-14) self.intopt = intopt int2c2e = mol._add_suffix('int2c2e') + fakemol_charge = gto.fakemol_for_charges(grid_coords.get(), expnt=charge_exp.get()**2) fakemol_nuc = gto.fakemol_for_charges(atom_coords) - v_ng = gto.mole.intor_cross(int2c2e, fakemol_nuc, auxmol) + v_ng = gto.mole.intor_cross(int2c2e, fakemol_nuc, fakemol_charge) v_grids_n = np.dot(atom_charges, v_ng) self.v_grids_n = cupy.asarray(v_grids_n)