Skip to content

Commit

Permalink
added interaction dataset docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
mcneela committed Mar 8, 2024
1 parent ad55296 commit 0a51e7c
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 8 deletions.
12 changes: 12 additions & 0 deletions openqdc/datasets/interaction/L7.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@


class L7(BaseInteractionDataset):
"""
The L7 interaction energy dataset as described in:
Accuracy of Quantum Chemical Methods for Large Noncovalent Complexes
Robert Sedlak, Tomasz Janowski, Michal Pitoňák, Jan Řezáč, Peter Pulay, and Pavel Hobza
Journal of Chemical Theory and Computation 2013 9 (8), 3364-3374
DOI: 10.1021/ct400036b
Data was downloaded and extracted from:
http://cuby4.molecular.cz/dataset_l7.html
"""

__name__ = "L7"
__energy_unit__ = "hartree"
__distance_unit__ = "ang"
Expand Down
13 changes: 13 additions & 0 deletions openqdc/datasets/interaction/X40.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@


class X40(BaseInteractionDataset):
"""
X40 interaction dataset of 40 dimer pairs as
introduced in the following paper:
Benchmark Calculations of Noncovalent Interactions of Halogenated Molecules
Jan Řezáč, Kevin E. Riley, and Pavel Hobza
Journal of Chemical Theory and Computation 2012 8 (11), 4285-4292
DOI: 10.1021/ct300647k
Dataset retrieved and processed from:
http://cuby4.molecular.cz/dataset_x40.html
"""

__name__ = "X40"
__energy_unit__ = "hartree"
__distance_unit__ = "ang"
Expand Down
10 changes: 10 additions & 0 deletions openqdc/datasets/interaction/des370k.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@


class DES370K(BaseInteractionDataset):
"""
DE Shaw Research interaction energy of over 370K
small molecule dimers as described in the paper:
Quantum chemical benchmark databases of gold-standard dimer interaction energies.
Donchev, A.G., Taube, A.G., Decolvenaere, E. et al.
Sci Data 8, 55 (2021).
https://doi.org/10.1038/s41597-021-00833-x
"""

__name__ = "des370k_interaction"
__energy_unit__ = "hartree"
__distance_unit__ = "ang"
Expand Down
10 changes: 10 additions & 0 deletions openqdc/datasets/interaction/des5m.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@


class DES5M(DES370K):
"""
DE Shaw Research interaction energy calculations for
over 5M small molecule dimers as described in the paper:
Quantum chemical benchmark databases of gold-standard dimer interaction energies.
Donchev, A.G., Taube, A.G., Decolvenaere, E. et al.
Sci Data 8, 55 (2021).
https://doi.org/10.1038/s41597-021-00833-x
"""

__name__ = "des5m_interaction"
__energy_methods__ = [
"mp2/cc-pvqz",
Expand Down
15 changes: 15 additions & 0 deletions openqdc/datasets/interaction/dess66.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,21 @@


class DESS66(BaseInteractionDataset):
"""
DE Shaw Research interaction energy
estimates of all 66 conformers from
the original S66 dataset as described
in the paper:
Quantum chemical benchmark databases of gold-standard dimer interaction energies.
Donchev, A.G., Taube, A.G., Decolvenaere, E. et al.
Sci Data 8, 55 (2021).
https://doi.org/10.1038/s41597-021-00833-x
Data was downloaded from Zenodo:
https://zenodo.org/records/5676284
"""

__name__ = "des_s66"
__energy_unit__ = "hartree"
__distance_unit__ = "ang"
Expand Down
16 changes: 16 additions & 0 deletions openqdc/datasets/interaction/dess66x8.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@


class DESS66x8(BaseInteractionDataset):
"""
DE Shaw Research interaction energy
estimates of all 528 conformers from
the original S66x8 dataset as described
in the paper:
Quantum chemical benchmark databases of gold-standard dimer interaction energies.
Donchev, A.G., Taube, A.G., Decolvenaere, E. et al.
Sci Data 8, 55 (2021).
https://doi.org/10.1038/s41597-021-00833-x
Data was downloaded from Zenodo:
https://zenodo.org/records/5676284
"""

__name__ = "des_s66x8"
__energy_unit__ = "hartree"
__distance_unit__ = "ang"
Expand Down
25 changes: 17 additions & 8 deletions openqdc/datasets/interaction/metcalf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,23 @@


class Metcalf(BaseInteractionDataset):
"""
Hydrogen-bonded dimers of NMA with 126 molecules as described in:
Approaches for machine learning intermolecular interaction energies and
application to energy components from symmetry adapted perturbation theory.
Derek P. Metcalf, Alexios Koutsoukas, Steven A. Spronk, Brian L. Claus,
Deborah A. Loughney, Stephen R. Johnson, Daniel L. Cheney, C. David Sherrill;
J. Chem. Phys. 21 February 2020; 152 (7): 074103.
https://doi.org/10.1063/1.5142636
Further details:
"Hydrogen-bonded dimers involving N-methylacetamide (NMA) and 126 molecules
(46 donors and 80 acceptors; Figs. 2 and 3) were used. Optimized geometries
for the 126 individual monomers were obtained and paired with NMA in broad
arrays of spatial configurations to generate thousands of complexes for training.
"""

__name__ = "metcalf"
__energy_unit__ = "hartree"
__distance_unit__ = "ang"
Expand All @@ -22,14 +39,6 @@ class Metcalf(BaseInteractionDataset):
]

def read_raw_entries(self) -> List[Dict]:
"""
SMILES strings are inferred from the
.xyz files using the RDKit xyz2mol function.
More details here:
https://github.com/jensengroup/xyz2mol
"""
data = []
for dirname in os.listdir(self.root):
xyz_dir = os.path.join(self.root, dirname)
Expand Down
10 changes: 10 additions & 0 deletions openqdc/datasets/interaction/splinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@


class Splinter(BaseInteractionDataset):
"""
A dataset of over 1.7 million protein-ligand
interactions as described in the paper:
A quantum chemical interaction energy dataset for accurately modeling protein-ligand interactions.
Spronk, S.A., Glick, Z.L., Metcalf, D.P. et al.
Sci Data 10, 619 (2023).
https://doi.org/10.1038/s41597-023-02443-1
"""

__name__ = "splinter"
__energy_methods__ = [
"sapt0/jun-cc-pV(D+d)Z_unscaled",
Expand Down

0 comments on commit 0a51e7c

Please sign in to comment.