From 0a51e7c2fe43341b85f12187158bbbe6ae7d084e Mon Sep 17 00:00:00 2001 From: mcneela Date: Fri, 8 Mar 2024 11:24:24 -0500 Subject: [PATCH] added interaction dataset docstrings --- openqdc/datasets/interaction/L7.py | 12 ++++++++++++ openqdc/datasets/interaction/X40.py | 13 ++++++++++++ openqdc/datasets/interaction/des370k.py | 10 ++++++++++ openqdc/datasets/interaction/des5m.py | 10 ++++++++++ openqdc/datasets/interaction/dess66.py | 15 ++++++++++++++ openqdc/datasets/interaction/dess66x8.py | 16 +++++++++++++++ openqdc/datasets/interaction/metcalf.py | 25 ++++++++++++++++-------- openqdc/datasets/interaction/splinter.py | 10 ++++++++++ 8 files changed, 103 insertions(+), 8 deletions(-) diff --git a/openqdc/datasets/interaction/L7.py b/openqdc/datasets/interaction/L7.py index db6fa3c..8c76c03 100644 --- a/openqdc/datasets/interaction/L7.py +++ b/openqdc/datasets/interaction/L7.py @@ -10,6 +10,18 @@ class L7(BaseInteractionDataset): + """ + The L7 interaction energy dataset as described in: + + Accuracy of Quantum Chemical Methods for Large Noncovalent Complexes + Robert Sedlak, Tomasz Janowski, Michal Pitoňák, Jan Řezáč, Peter Pulay, and Pavel Hobza + Journal of Chemical Theory and Computation 2013 9 (8), 3364-3374 + DOI: 10.1021/ct400036b + + Data was downloaded and extracted from: + http://cuby4.molecular.cz/dataset_l7.html + """ + __name__ = "L7" __energy_unit__ = "hartree" __distance_unit__ = "ang" diff --git a/openqdc/datasets/interaction/X40.py b/openqdc/datasets/interaction/X40.py index 09d65aa..f38f22a 100644 --- a/openqdc/datasets/interaction/X40.py +++ b/openqdc/datasets/interaction/X40.py @@ -10,6 +10,19 @@ class X40(BaseInteractionDataset): + """ + X40 interaction dataset of 40 dimer pairs as + introduced in the following paper: + + Benchmark Calculations of Noncovalent Interactions of Halogenated Molecules + Jan Řezáč, Kevin E. Riley, and Pavel Hobza + Journal of Chemical Theory and Computation 2012 8 (11), 4285-4292 + DOI: 10.1021/ct300647k + + Dataset retrieved and processed from: + http://cuby4.molecular.cz/dataset_x40.html + """ + __name__ = "X40" __energy_unit__ = "hartree" __distance_unit__ = "ang" diff --git a/openqdc/datasets/interaction/des370k.py b/openqdc/datasets/interaction/des370k.py index 9cbf736..b98b296 100644 --- a/openqdc/datasets/interaction/des370k.py +++ b/openqdc/datasets/interaction/des370k.py @@ -11,6 +11,16 @@ class DES370K(BaseInteractionDataset): + """ + DE Shaw Research interaction energy of over 370K + small molecule dimers as described in the paper: + + Quantum chemical benchmark databases of gold-standard dimer interaction energies. + Donchev, A.G., Taube, A.G., Decolvenaere, E. et al. + Sci Data 8, 55 (2021). + https://doi.org/10.1038/s41597-021-00833-x + """ + __name__ = "des370k_interaction" __energy_unit__ = "hartree" __distance_unit__ = "ang" diff --git a/openqdc/datasets/interaction/des5m.py b/openqdc/datasets/interaction/des5m.py index 256df0b..40e2ca1 100644 --- a/openqdc/datasets/interaction/des5m.py +++ b/openqdc/datasets/interaction/des5m.py @@ -11,6 +11,16 @@ class DES5M(DES370K): + """ + DE Shaw Research interaction energy calculations for + over 5M small molecule dimers as described in the paper: + + Quantum chemical benchmark databases of gold-standard dimer interaction energies. + Donchev, A.G., Taube, A.G., Decolvenaere, E. et al. + Sci Data 8, 55 (2021). + https://doi.org/10.1038/s41597-021-00833-x + """ + __name__ = "des5m_interaction" __energy_methods__ = [ "mp2/cc-pvqz", diff --git a/openqdc/datasets/interaction/dess66.py b/openqdc/datasets/interaction/dess66.py index a0d361b..7b53057 100644 --- a/openqdc/datasets/interaction/dess66.py +++ b/openqdc/datasets/interaction/dess66.py @@ -11,6 +11,21 @@ class DESS66(BaseInteractionDataset): + """ + DE Shaw Research interaction energy + estimates of all 66 conformers from + the original S66 dataset as described + in the paper: + + Quantum chemical benchmark databases of gold-standard dimer interaction energies. + Donchev, A.G., Taube, A.G., Decolvenaere, E. et al. + Sci Data 8, 55 (2021). + https://doi.org/10.1038/s41597-021-00833-x + + Data was downloaded from Zenodo: + https://zenodo.org/records/5676284 + """ + __name__ = "des_s66" __energy_unit__ = "hartree" __distance_unit__ = "ang" diff --git a/openqdc/datasets/interaction/dess66x8.py b/openqdc/datasets/interaction/dess66x8.py index 575d601..1291c7a 100644 --- a/openqdc/datasets/interaction/dess66x8.py +++ b/openqdc/datasets/interaction/dess66x8.py @@ -11,6 +11,22 @@ class DESS66x8(BaseInteractionDataset): + """ + DE Shaw Research interaction energy + estimates of all 528 conformers from + the original S66x8 dataset as described + in the paper: + + Quantum chemical benchmark databases of gold-standard dimer interaction energies. + Donchev, A.G., Taube, A.G., Decolvenaere, E. et al. + Sci Data 8, 55 (2021). + https://doi.org/10.1038/s41597-021-00833-x + + Data was downloaded from Zenodo: + + https://zenodo.org/records/5676284 + """ + __name__ = "des_s66x8" __energy_unit__ = "hartree" __distance_unit__ = "ang" diff --git a/openqdc/datasets/interaction/metcalf.py b/openqdc/datasets/interaction/metcalf.py index 3553d90..caddc8f 100644 --- a/openqdc/datasets/interaction/metcalf.py +++ b/openqdc/datasets/interaction/metcalf.py @@ -8,6 +8,23 @@ class Metcalf(BaseInteractionDataset): + """ + Hydrogen-bonded dimers of NMA with 126 molecules as described in: + + Approaches for machine learning intermolecular interaction energies and + application to energy components from symmetry adapted perturbation theory. + Derek P. Metcalf, Alexios Koutsoukas, Steven A. Spronk, Brian L. Claus, + Deborah A. Loughney, Stephen R. Johnson, Daniel L. Cheney, C. David Sherrill; + J. Chem. Phys. 21 February 2020; 152 (7): 074103. + https://doi.org/10.1063/1.5142636 + + Further details: + "Hydrogen-bonded dimers involving N-methylacetamide (NMA) and 126 molecules + (46 donors and 80 acceptors; Figs. 2 and 3) were used. Optimized geometries + for the 126 individual monomers were obtained and paired with NMA in broad + arrays of spatial configurations to generate thousands of complexes for training. + """ + __name__ = "metcalf" __energy_unit__ = "hartree" __distance_unit__ = "ang" @@ -22,14 +39,6 @@ class Metcalf(BaseInteractionDataset): ] def read_raw_entries(self) -> List[Dict]: - """ - SMILES strings are inferred from the - .xyz files using the RDKit xyz2mol function. - More details here: - - https://github.com/jensengroup/xyz2mol - - """ data = [] for dirname in os.listdir(self.root): xyz_dir = os.path.join(self.root, dirname) diff --git a/openqdc/datasets/interaction/splinter.py b/openqdc/datasets/interaction/splinter.py index 2841cf7..25611c2 100644 --- a/openqdc/datasets/interaction/splinter.py +++ b/openqdc/datasets/interaction/splinter.py @@ -10,6 +10,16 @@ class Splinter(BaseInteractionDataset): + """ + A dataset of over 1.7 million protein-ligand + interactions as described in the paper: + + A quantum chemical interaction energy dataset for accurately modeling protein-ligand interactions. + Spronk, S.A., Glick, Z.L., Metcalf, D.P. et al. + Sci Data 10, 619 (2023). + https://doi.org/10.1038/s41597-023-02443-1 + """ + __name__ = "splinter" __energy_methods__ = [ "sapt0/jun-cc-pV(D+d)Z_unscaled",