From 3742529927541b84275e4e13dff3f36407eb2990 Mon Sep 17 00:00:00 2001 From: Corin Wagen Date: Tue, 18 Oct 2022 15:47:06 -0400 Subject: [PATCH] add rotational initialization --- build/lib/cctk/__init__.py | 18 + build/lib/cctk/array.py | 135 ++ build/lib/cctk/data/__init__.py | 0 build/lib/cctk/data/covalent_radii.csv | 97 ++ build/lib/cctk/data/isotopes.csv | 355 +++++ build/lib/cctk/data/vdw_radii.csv | 76 + build/lib/cctk/ensemble.py | 663 +++++++++ build/lib/cctk/file.py | 81 ++ build/lib/cctk/gaussian_file.py | 757 ++++++++++ build/lib/cctk/group.py | 277 ++++ build/lib/cctk/groups/AcH.mol2 | 29 + build/lib/cctk/groups/BrH.mol2 | 19 + build/lib/cctk/groups/CF3H.mol2 | 25 + build/lib/cctk/groups/CHOH.mol2 | 23 + build/lib/cctk/groups/ClH.mol2 | 19 + build/lib/cctk/groups/EtH.mol2 | 31 + build/lib/cctk/groups/FH.mol2 | 19 + build/lib/cctk/groups/HCN.mol2 | 21 + build/lib/cctk/groups/HCO2Me.mol2 | 31 + build/lib/cctk/groups/HNO2.mol2 | 23 + build/lib/cctk/groups/IH.mol2 | 19 + build/lib/cctk/groups/MeH.mol2 | 25 + build/lib/cctk/groups/NH3.mol2 | 23 + build/lib/cctk/groups/NHAcH.mol2 | 33 + build/lib/cctk/groups/NMe2H.mol2 | 35 + build/lib/cctk/groups/OH2.mol2 | 21 + build/lib/cctk/groups/OMeH.mol2 | 27 + build/lib/cctk/groups/SF5H.mol2 | 29 + build/lib/cctk/groups/SO3HH.mol2 | 27 + build/lib/cctk/groups/__init__.py | 0 build/lib/cctk/groups/iPrH.mol2 | 37 + build/lib/cctk/groups/tBuH.mol2 | 43 + build/lib/cctk/helper_functions.py | 708 +++++++++ build/lib/cctk/lines.py | 163 +++ build/lib/cctk/load_groups.py | 109 ++ build/lib/cctk/mae_file.py | 278 ++++ build/lib/cctk/mol2_file.py | 351 +++++ build/lib/cctk/molecule.py | 1832 ++++++++++++++++++++++++ build/lib/cctk/optimize.py | 181 +++ build/lib/cctk/orca_file.py | 375 +++++ build/lib/cctk/parse_gaussian.py | 768 ++++++++++ build/lib/cctk/parse_orca.py | 220 +++ build/lib/cctk/pdb_file.py | 56 + build/lib/cctk/point_charge.py | 18 + build/lib/cctk/quasiclassical.py | 214 +++ build/lib/cctk/si_file.py | 89 ++ build/lib/cctk/topology.py | 267 ++++ build/lib/cctk/vibrational_mode.py | 217 +++ build/lib/cctk/xyz_file.py | 190 +++ cctk/molecule.py | 43 + cctk/quasiclassical.py | 37 +- setup.py | 5 +- test/static/h2.xyz | 4 + test/test_freqs.py | 4 +- test/test_molecule.py | 12 + 55 files changed, 9153 insertions(+), 6 deletions(-) create mode 100644 build/lib/cctk/__init__.py create mode 100644 build/lib/cctk/array.py create mode 100644 build/lib/cctk/data/__init__.py create mode 100644 build/lib/cctk/data/covalent_radii.csv create mode 100644 build/lib/cctk/data/isotopes.csv create mode 100644 build/lib/cctk/data/vdw_radii.csv create mode 100644 build/lib/cctk/ensemble.py create mode 100644 build/lib/cctk/file.py create mode 100644 build/lib/cctk/gaussian_file.py create mode 100644 build/lib/cctk/group.py create mode 100644 build/lib/cctk/groups/AcH.mol2 create mode 100644 build/lib/cctk/groups/BrH.mol2 create mode 100644 build/lib/cctk/groups/CF3H.mol2 create mode 100644 build/lib/cctk/groups/CHOH.mol2 create mode 100644 build/lib/cctk/groups/ClH.mol2 create mode 100644 build/lib/cctk/groups/EtH.mol2 create mode 100644 build/lib/cctk/groups/FH.mol2 create mode 100644 build/lib/cctk/groups/HCN.mol2 create mode 100644 build/lib/cctk/groups/HCO2Me.mol2 create mode 100644 build/lib/cctk/groups/HNO2.mol2 create mode 100644 build/lib/cctk/groups/IH.mol2 create mode 100644 build/lib/cctk/groups/MeH.mol2 create mode 100644 build/lib/cctk/groups/NH3.mol2 create mode 100644 build/lib/cctk/groups/NHAcH.mol2 create mode 100644 build/lib/cctk/groups/NMe2H.mol2 create mode 100644 build/lib/cctk/groups/OH2.mol2 create mode 100644 build/lib/cctk/groups/OMeH.mol2 create mode 100644 build/lib/cctk/groups/SF5H.mol2 create mode 100644 build/lib/cctk/groups/SO3HH.mol2 create mode 100644 build/lib/cctk/groups/__init__.py create mode 100644 build/lib/cctk/groups/iPrH.mol2 create mode 100644 build/lib/cctk/groups/tBuH.mol2 create mode 100644 build/lib/cctk/helper_functions.py create mode 100644 build/lib/cctk/lines.py create mode 100644 build/lib/cctk/load_groups.py create mode 100644 build/lib/cctk/mae_file.py create mode 100644 build/lib/cctk/mol2_file.py create mode 100644 build/lib/cctk/molecule.py create mode 100644 build/lib/cctk/optimize.py create mode 100644 build/lib/cctk/orca_file.py create mode 100644 build/lib/cctk/parse_gaussian.py create mode 100644 build/lib/cctk/parse_orca.py create mode 100644 build/lib/cctk/pdb_file.py create mode 100644 build/lib/cctk/point_charge.py create mode 100644 build/lib/cctk/quasiclassical.py create mode 100644 build/lib/cctk/si_file.py create mode 100644 build/lib/cctk/topology.py create mode 100644 build/lib/cctk/vibrational_mode.py create mode 100644 build/lib/cctk/xyz_file.py create mode 100644 test/static/h2.xyz diff --git a/build/lib/cctk/__init__.py b/build/lib/cctk/__init__.py new file mode 100644 index 0000000..8662561 --- /dev/null +++ b/build/lib/cctk/__init__.py @@ -0,0 +1,18 @@ +from .file import File +from .lines import LazyLineObject +from .array import OneIndexedArray +from .molecule import Molecule +from .ensemble import Ensemble, ConformationalEnsemble +from .group import Group +from .vibrational_mode import VibrationalMode + +from .gaussian_file import GaussianJobType, GaussianFile +from .orca_file import OrcaFile, OrcaJobType +from .xyz_file import XYZFile +from .mol2_file import MOL2File +from .mae_file import MAEFile +from .pdb_file import PDBFile + +from .si_file import SIFile + +from .point_charge import PointCharge diff --git a/build/lib/cctk/array.py b/build/lib/cctk/array.py new file mode 100644 index 0000000..6f14df4 --- /dev/null +++ b/build/lib/cctk/array.py @@ -0,0 +1,135 @@ +import numpy as np +import copy + +class OneIndexedArray(np.ndarray): + """ + Wrapper for ``np.ndarray`` that's indexed from one, not zero, to store atomic numbers and geometries. + This only works on 1D or 2D arrays. Additionally, only the first index of a 2D array will be 1-indexed. + + Note that ``enumerate(one_indexed_array)`` will throw ``IndexError`` -- instead, use ``enumerate(one_indexed_array, start=1)``. + """ + + def __new__(cls, obj, **kwargs): + new = np.array(obj, **kwargs).view(cls) + return new + + def __getitem__(self, index): + index = copy.deepcopy(index) + if isinstance(index, slice): + if index.start is None: + start = 0 + else: + start = index.start - 1 + if index.stop is None: + stop = -1 + else: + stop = index.stop - 1 + new_index = slice(start, stop, index.step) + return super().__getitem__(new_index) + elif isinstance(index, int): + if index > 0: + return super().__getitem__(index-1) + elif index == 0: + raise IndexError("this is a 1-indexed array: no element 0!") + elif index < 0: + return super().__getitem__(index) + elif (isinstance(index, tuple)) and (len(index) == 2): + if index[0] is None: + return super().__getitem__((index[0], index[1])) + elif index[0] > 0: + return super().__getitem__((index[0]-1, index[1])) + elif index[0] == 0: + raise IndexError("this is a 1-indexed array: no element 0!") + elif index[0] < 0: + return super().__getitem__((index[0], index[1])) + elif (isinstance(index, tuple)) and (len(index) == 1): + return self.__getitem__(index[0]) + elif isinstance(index, np.ndarray): + if index.dtype == bool: + return super().__getitem__(index) + elif index.ndim == 1: + index[index >= 1] += -1 + return super().__getitem__(index) + else: + index[0][index >= 1] += -1 + return super().__getitem__(index) + elif isinstance(index, list): + if isinstance(index[0], bool): + return super().__getitem__(index) + elif isinstance(index[0], list): + if isinstance(index[0][0], bool): + return super().__getitem__(index) + for i, v in enumerate(index[0]): + if v >= 1: + index[i] += -1 + return super().__getitem__(index) + else: + for i, v in enumerate(index): + if v >= 1: + index[i] += -1 + return super().__getitem__(index) + else: + return super().__getitem__(index) + + def __setitem__(self, index, value): + index = copy.deepcopy(index) + if isinstance(index, int): + if index > 0: + if self.ndim == 1: + super().__setitem__(index-1, value) + elif self.ndim == 2: + super().__setitem__(index, value) + else: + raise TypeError("this datatype is only defined for 1D and 2D ndarrays") + elif index == 0: + raise IndexError("this is a 1-indexed array: no element 0!") + elif index < 0: + super().__setitem__(index, value) + elif (isinstance(index, tuple)) and (len(index) == 2): + if index[0] is None: + super().__setitem__((index[0], index[1]), value) + elif index[0] > 0: + super().__setitem__((index[0]-1, index[1]), value) + elif index[0] == 0: + raise IndexError("this is a 1-indexed array: no element 0!") + elif index[0] < 0: + super().__setitem__((index[0], index[1]), value) + elif (isinstance(index, tuple)) and (len(index) == 1): + return self.__setitem__(index[0], value) + elif isinstance(index, np.ndarray): + if index.dtype == bool: + super().__setitem__(index, value) + elif index.ndim == 1: + index[index >= 1] += -1 + super().__setitem__(index, value) + else: + index[0][index >= 1] += -1 + super().__setitem__(index, value) + elif isinstance(index, list): + if isinstance(index[0], bool): + super().__setitem__(index, value) + elif isinstance(index[0], list): + if isinstance(index[0][0], bool): + super().__setitem__(index, value) + for i, v in enumerate(index[0]): + if v >= 1: + index[i] += -1 + super().__setitem__(index, value) + else: + for i, v in enumerate(index): + if v >= 1: + index[i] += -1 + super().__setitem__(index, value) + else: + super().__setitem__(index, value) +# raise IndexError(f"invalid index {index} for OneIndexedArray") + + def __iter__(self): + for idx in range(1,len(self)+1): + yield self.__getitem__(idx) + + def __hash__(self): + return hash(self.data.tobytes()) + + def __str__(self): + return self.view(np.ndarray).__str__() diff --git a/build/lib/cctk/data/__init__.py b/build/lib/cctk/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/build/lib/cctk/data/covalent_radii.csv b/build/lib/cctk/data/covalent_radii.csv new file mode 100644 index 0000000..ae3fe39 --- /dev/null +++ b/build/lib/cctk/data/covalent_radii.csv @@ -0,0 +1,97 @@ +Number,Symbol,Radius,StdDev,NumAnalyzed +1,H,0.31,5,129 +2,He,0.28 +3,Li,1.28,7,5789 +4,Be,0.96,3,310 +5,B,0.84,3,1770 +6,C,0.76,1,10,000 +7,N,0.71,1,2200 +8,O,0.66,2,10,000 +9,F,0.57,3,10,000 +10,Ne,0.58 +11,Na,1.66,9,1629 +12,Mg,1.41,7,3234 +13,Al,1.21,4,3206 +14,Si,1.11,2,10,000 +15,P,1.07,3,10,000 +16,S,1.05,3,10,000 +17,Cl,1.02,4,1987 +18,Ar,1.06,10,9 +19,K,2.03,12,435 +20,Ca,1.76,10,2647 +21,Sc,1.70,7,32 +22,Ti,1.60,8,231 +23,V,1.53,8,389 +24,Cr,1.39,5,916 +25,Mn,1.61,8,929 +26,Fe,1.52,6,1540 +27,Co,1.50,7,780 +28,Ni,1.24,4,1030 +29,Cu,1.32,4,1149 +30,Zn,1.22,4,443 +31,Ga,1.22,3,1330 +32,Ge,1.20,4,1013 +33,As,1.19,4,2015 +34,Se,1.20,4,1717 +35,Br,1.20,3,2140 +36,Kr,1.16,4,5 +37,Rb,2.20,9,23 +38,Sr,1.95,10,1500 +39,Y,1.90,7,30 +40,Zr,1.75,7,93 +41,Nb,1.64,6,18 +42,Mo,1.54,5,97 +43,Tc,1.47,7,96 +44,Ru,1.46,7,1032 +45,Rh,1.42,7,458 +46,Pd,1.39,6,1892 +47,Ag,1.45,5,1728 +48,Cd,1.44,9,19 +49,In,1.42,5,546 +50,Sn,1.39,4,2999 +51,Sb,1.39,5,609 +52,Te,1.38,4,692 +53,I,1.39,3,451 +54,Xe,1.40,9,2 +55,Cs,2.44,11,24 +56,Ba,2.15,11,3076 +57,La,2.07,8,190 +58,Ce,2.04,9,47 +59,Pr,2.03,7,58 +60,Nd,2.01,6,96 +61,Pm,1.99 +62,Sm,1.98,8,53 +63,Eu,1.98,6,167 +64,Gd,1.96,6,178 +65,Tb,1.94,5,55 +66,Dy,1.92,7,59 +67,Ho,1.92,7,48 +68,Er,1.89,6,66 +69,Tm,1.90,10,15 +70,Yb,1.87,8,122 +71,Lu,1.87,8,61 +72,Hf,1.75,10,53 +73,Ta,1.70,8,88 +74,W,1.62,7,219 +75,Re,1.51,7,476 +76,Os,1.44,4,99 +77,Ir,1.41,6,131 +78,Pt,1.36,5,1768 +79,Au,1.36,6,114 +80,Hg,1.32,5,137 +81,Tl,1.45,7,291 +82,Pb,1.46,5,112 +83,Bi,1.48,4,51 +84,Po,1.40,4,4 +85,At,1.50 +86,Rn,1.50 +87,Fr,2.60 +88,Ra,2.21,2,3 +89,Ac,2.15,1 +90,Th,2.06,6,11 +91,Pa,2.00,1 +92,U,1.96,7,57 +93,Np,1.90,1,22 +94,Pu,1.87,1,9 +95,Am,1.80,6,11 +96,Cm,1.69,3,16 diff --git a/build/lib/cctk/data/isotopes.csv b/build/lib/cctk/data/isotopes.csv new file mode 100644 index 0000000..5341170 --- /dev/null +++ b/build/lib/cctk/data/isotopes.csv @@ -0,0 +1,355 @@ +Symbol,Number,Mass,Abundance +H,1,1.007825,0.999885 +H,1,2.014102,0.000115 +H,1,3.016049,0.000000 +He,2,3.016029,0.000001 +He,2,4.002603,0.999999 +Li,3,6.015123,0.075900 +Li,3,7.016003,0.924100 +Be,4,9.012183,1.000000 +B,5,10.012937,0.199000 +B,5,11.009305,0.801000 +C,6,12.000000,0.989300 +C,6,13.003355,0.010700 +C,6,14.003242,0.000000 +N,7,14.003074,0.996360 +N,7,15.000109,0.003640 +O,8,15.994915,0.997570 +O,8,16.999132,0.000380 +O,8,17.999160,0.002050 +F,9,18.998403,1.000000 +Ne,10,19.992440,0.904800 +Ne,10,20.993847,0.002700 +Ne,10,21.991385,0.092500 +Na,11,22.989769,1.000000 +Mg,12,23.985042,0.789900 +Mg,12,24.985837,0.100000 +Mg,12,25.982593,0.110100 +Al,13,26.981539,1.000000 +Si,14,27.976927,0.922230 +Si,14,28.976495,0.046850 +Si,14,29.973770,0.030920 +P,15,30.973762,1.000000 +S,16,31.972071,0.949900 +S,16,32.971459,0.007500 +S,16,33.967867,0.042500 +S,16,35.967081,0.000100 +Cl,17,34.968853,0.757600 +Cl,17,36.965903,0.242400 +Ar,18,35.967545,0.003336 +Ar,18,37.962732,0.000629 +Ar,18,39.962383,0.996035 +K,19,38.963706,0.932581 +K,19,39.963998,0.000117 +K,19,40.961825,0.067302 +Ca,20,39.962591,0.969410 +Ca,20,41.958618,0.006470 +Ca,20,42.958766,0.001350 +Ca,20,43.955482,0.020860 +Ca,20,45.953689,0.000040 +Ca,20,47.952523,0.001870 +Sc,21,44.955908,1.000000 +Ti,22,45.952628,0.082500 +Ti,22,46.951759,0.074400 +Ti,22,47.947942,0.737200 +Ti,22,48.947866,0.054100 +Ti,22,49.944787,0.051800 +V,23,49.947156,0.002500 +V,23,50.943957,0.997500 +Cr,24,49.946042,0.043450 +Cr,24,51.940506,0.837890 +Cr,24,52.940648,0.095010 +Cr,24,53.938879,0.023650 +Mn,25,54.938044,1.000000 +Fe,26,53.939609,0.058450 +Fe,26,55.934936,0.917540 +Fe,26,56.935393,0.021190 +Fe,26,57.933274,0.002820 +Co,27,58.933194,1.000000 +Ni,28,57.935342,0.680770 +Ni,28,59.930786,0.262230 +Ni,28,60.931056,0.011399 +Ni,28,61.928345,0.036346 +Ni,28,63.927967,0.009255 +Cu,29,62.929598,0.691500 +Cu,29,64.927790,0.308500 +Zn,30,63.929142,0.491700 +Zn,30,65.926034,0.277300 +Zn,30,66.927128,0.040400 +Zn,30,67.924845,0.184500 +Zn,30,69.925319,0.006100 +Ga,31,68.925573,0.601080 +Ga,31,70.924703,0.398920 +Ge,32,69.924249,0.205700 +Ge,32,71.922076,0.274500 +Ge,32,72.923459,0.077500 +Ge,32,73.921178,0.365000 +Ge,32,75.921403,0.077300 +As,33,74.921595,1.000000 +Se,34,73.922476,0.008900 +Se,34,75.919214,0.093700 +Se,34,76.919914,0.076300 +Se,34,77.917309,0.237700 +Se,34,79.916522,0.496100 +Se,34,81.916699,0.087300 +Br,35,78.918338,0.506900 +Br,35,80.916290,0.493100 +Kr,36,77.920365,0.003550 +Kr,36,79.916378,0.022860 +Kr,36,81.913483,0.115930 +Kr,36,82.914127,0.115000 +Kr,36,83.911498,0.569870 +Kr,36,85.910611,0.172790 +Rb,37,84.911790,0.721700 +Rb,37,86.909181,0.278300 +Sr,38,83.913419,0.005600 +Sr,38,85.909261,0.098600 +Sr,38,86.908878,0.070000 +Sr,38,87.905613,0.825800 +Y,39,88.905840,1.000000 +Zr,40,89.904698,0.514500 +Zr,40,90.905640,0.112200 +Zr,40,91.905035,0.171500 +Zr,40,93.906311,0.173800 +Zr,40,95.908271,0.028000 +Nb,41,92.906373,1.000000 +Mo,42,91.906808,0.145300 +Mo,42,93.905085,0.091500 +Mo,42,94.905839,0.158400 +Mo,42,95.904676,0.166700 +Mo,42,96.906018,0.096000 +Mo,42,97.905405,0.243900 +Mo,42,99.907472,0.098200 +Tc,43,96.906367,0.000000 +Tc,43,97.907212,0.000000 +Tc,43,98.906251,0.000000 +Ru,44,95.907590,0.055400 +Ru,44,97.905287,0.018700 +Ru,44,98.905934,0.127600 +Ru,44,99.904214,0.126000 +Ru,44,100.905577,0.170600 +Ru,44,101.904344,0.315500 +Ru,44,103.905428,0.186200 +Rh,45,102.905498,1.000000 +Pd,46,101.905602,0.010200 +Pd,46,103.904031,0.111400 +Pd,46,104.905080,0.223300 +Pd,46,105.903480,0.273300 +Pd,46,107.903892,0.264600 +Pd,46,109.905172,0.117200 +Ag,47,106.905092,0.518390 +Ag,47,108.904755,0.481610 +Cd,48,105.906460,0.012500 +Cd,48,107.904183,0.008900 +Cd,48,109.903007,0.124900 +Cd,48,110.904183,0.128000 +Cd,48,111.902763,0.241300 +Cd,48,112.904408,0.122200 +Cd,48,113.903365,0.287300 +Cd,48,115.904763,0.074900 +In,49,112.904062,0.042900 +In,49,114.903879,0.957100 +Sn,50,111.904824,0.009700 +Sn,50,113.902783,0.006600 +Sn,50,114.903345,0.003400 +Sn,50,115.901743,0.145400 +Sn,50,116.902954,0.076800 +Sn,50,117.901607,0.242200 +Sn,50,118.903311,0.085900 +Sn,50,119.902202,0.325800 +Sn,50,121.903444,0.046300 +Sn,50,123.905277,0.057900 +Sb,51,120.903812,0.572100 +Sb,51,122.904213,0.427900 +Te,52,119.904059,0.000900 +Te,52,121.903043,0.025500 +Te,52,122.904270,0.008900 +Te,52,123.902817,0.047400 +Te,52,124.904430,0.070700 +Te,52,125.903311,0.188400 +Te,52,127.904461,0.317400 +Te,52,129.906223,0.340800 +I,53,126.904472,1.000000 +Xe,54,123.905892,0.000952 +Xe,54,125.904298,0.000890 +Xe,54,127.903531,0.019102 +Xe,54,128.904781,0.264006 +Xe,54,129.903509,0.040710 +Xe,54,130.905084,0.212324 +Xe,54,131.904155,0.269086 +Xe,54,133.905395,0.104357 +Xe,54,135.907214,0.088573 +Cs,55,132.905452,1.000000 +Ba,56,129.906321,0.001060 +Ba,56,131.905061,0.001010 +Ba,56,133.904508,0.024170 +Ba,56,134.905688,0.065920 +Ba,56,135.904576,0.078540 +Ba,56,136.905827,0.112320 +Ba,56,137.905247,0.716980 +La,57,137.907115,0.000888 +La,57,138.906356,0.999112 +Ce,58,135.907129,0.001850 +Ce,58,137.905991,0.002510 +Ce,58,139.905443,0.884500 +Ce,58,141.909250,0.111140 +Pr,59,140.907658,1.000000 +Nd,60,141.907729,0.271520 +Nd,60,142.909820,0.121740 +Nd,60,143.910093,0.237980 +Nd,60,144.912579,0.082930 +Nd,60,145.913123,0.171890 +Nd,60,147.916899,0.057560 +Nd,60,149.920902,0.056380 +Pm,61,144.912756,0.000000 +Pm,61,146.915145,0.000000 +Sm,62,143.912006,0.030700 +Sm,62,146.914904,0.149900 +Sm,62,147.914829,0.112400 +Sm,62,148.917192,0.138200 +Sm,62,149.917283,0.073800 +Sm,62,151.919740,0.267500 +Sm,62,153.922217,0.227500 +Eu,63,150.919858,0.478100 +Eu,63,152.921238,0.521900 +Gd,64,151.919800,0.002000 +Gd,64,153.920874,0.021800 +Gd,64,154.922630,0.148000 +Gd,64,155.922131,0.204700 +Gd,64,156.923969,0.156500 +Gd,64,157.924112,0.248400 +Gd,64,159.927062,0.218600 +Tb,65,158.925355,1.000000 +Dy,66,155.924285,0.000560 +Dy,66,157.924416,0.000950 +Dy,66,159.925205,0.023290 +Dy,66,160.926941,0.188890 +Dy,66,161.926806,0.254750 +Dy,66,162.928738,0.248960 +Dy,66,163.929182,0.282600 +Ho,67,164.930329,1.000000 +Er,68,161.928788,0.001390 +Er,68,163.929209,0.016010 +Er,68,165.930299,0.335030 +Er,68,166.932055,0.228690 +Er,68,167.932377,0.269780 +Er,68,169.935470,0.149100 +Tm,69,168.934218,1.000000 +Yb,70,167.933890,0.001230 +Yb,70,169.934766,0.029820 +Yb,70,170.936330,0.140900 +Yb,70,171.936386,0.216800 +Yb,70,172.938215,0.161030 +Yb,70,173.938866,0.320260 +Yb,70,175.942576,0.129960 +Lu,71,174.940775,0.974010 +Lu,71,175.942690,0.025990 +Hf,72,173.940046,0.001600 +Hf,72,175.941408,0.052600 +Hf,72,176.943228,0.186000 +Hf,72,177.943706,0.272800 +Hf,72,178.945823,0.136200 +Hf,72,179.946557,0.350800 +Ta,73,179.947465,0.000120 +Ta,73,180.947996,0.999880 +W,74,179.946711,0.001200 +W,74,181.948204,0.265000 +W,74,182.950223,0.143100 +W,74,183.950931,0.306400 +W,74,185.954363,0.284300 +Re,75,184.952955,0.374000 +Re,75,186.955750,0.626000 +Os,76,183.952488,0.000200 +Os,76,185.953835,0.015900 +Os,76,186.955747,0.019600 +Os,76,187.955835,0.132400 +Os,76,188.958144,0.161500 +Os,76,189.958444,0.262600 +Os,76,191.961477,0.407800 +Ir,77,190.960589,0.373000 +Ir,77,192.962922,0.627000 +Pt,78,189.959930,0.000120 +Pt,78,191.961039,0.007820 +Pt,78,193.962681,0.328600 +Pt,78,194.964792,0.337800 +Pt,78,195.964952,0.252100 +Pt,78,197.967895,0.073560 +Au,79,196.966569,1.000000 +Hg,80,195.965833,0.001500 +Hg,80,197.966769,0.099700 +Hg,80,198.968281,0.168700 +Hg,80,199.968327,0.231000 +Hg,80,200.970303,0.131800 +Hg,80,201.970643,0.298600 +Hg,80,203.973494,0.068700 +Tl,81,202.972345,0.295200 +Tl,81,204.974428,0.704800 +Pb,82,203.973044,0.014000 +Pb,82,205.974466,0.241000 +Pb,82,206.975897,0.221000 +Pb,82,207.976653,0.524000 +Bi,83,208.980399,1.000000 +Po,84,208.982431,0.000000 +Po,84,209.982874,0.000000 +At,85,209.987148,0.000000 +At,85,210.987497,0.000000 +Rn,86,210.990601,0.000000 +Rn,86,220.011394,0.000000 +Rn,86,222.017578,0.000000 +Fr,87,223.019736,0.000000 +Ra,88,223.018502,0.000000 +Ra,88,224.020212,0.000000 +Ra,88,226.025410,0.000000 +Ra,88,228.031071,0.000000 +Ac,89,227.027752,0.000000 +Th,90,230.033134,0.000000 +Th,90,232.038056,1.000000 +Pa,91,231.035884,1.000000 +U,92,233.039636,0.000000 +U,92,234.040952,0.000054 +U,92,235.043930,0.007204 +U,92,236.045568,0.000000 +U,92,238.050788,0.992742 +Np,93,236.046570,0.000000 +Np,93,237.048174,0.000000 +Pu,94,238.049560,0.000000 +Pu,94,239.052164,0.000000 +Pu,94,240.053814,0.000000 +Pu,94,241.056852,0.000000 +Pu,94,242.058743,0.000000 +Pu,94,244.064205,0.000000 +Am,95,241.056829,0.000000 +Am,95,243.061381,0.000000 +Cm,96,243.061389,0.000000 +Cm,96,244.062753,0.000000 +Cm,96,245.065492,0.000000 +Cm,96,246.067224,0.000000 +Cm,96,247.070354,0.000000 +Cm,96,248.072350,0.000000 +Bk,97,247.070307,0.000000 +Bk,97,249.074988,0.000000 +Cf,98,249.074854,0.000000 +Cf,98,250.076406,0.000000 +Cf,98,251.079589,0.000000 +Cf,98,252.081627,0.000000 +Es,99,252.082980,0.000000 +Fm,100,257.095106,0.000000 +Md,101,258.098432,0.000000 +Md,101,260.103650,0.000000 +No,102,259.101030,0.000000 +Lr,103,262.109610,0.000000 +Rf,104,267.121790,0.000000 +Db,105,268.125670,0.000000 +Sg,106,271.133930,0.000000 +Bh,107,272.138260,0.000000 +Hs,108,270.134290,0.000000 +Mt,109,276.151590,0.000000 +Ds,110,281.164510,0.000000 +Rg,111,280.165140,0.000000 +Cn,112,285.177120,0.000000 +Nh,113,284.178730,0.000000 +Fl,114,289.190420,0.000000 +Mc,115,288.192740,0.000000 +Lv,116,293.204490,0.000000 +Ts,117,292.207460,0.000000 +Og,118,294.213920,0.000000 diff --git a/build/lib/cctk/data/vdw_radii.csv b/build/lib/cctk/data/vdw_radii.csv new file mode 100644 index 0000000..144f248 --- /dev/null +++ b/build/lib/cctk/data/vdw_radii.csv @@ -0,0 +1,76 @@ +1, 1.17 +2, 1.4 +3, 1.81 +4, 1.53 +5, 1.92 +6, 1.75 +7, 1.55 +8, 1.40 +9, 1.30 +10, 1.54 +11, 2.27 +12, 1.73 +13, 1.84 +14, 2.10 +15, 1.80 +16, 1.80 +17, 1.77 +18, 1.88 +19, 2.75 +20, 2.31 +21, 2.30 +22, 2.15 +23, 2.05 +24, 2.05 +25, 2.05 +26, 2.05 +27, 2.00 +28, 2.00 +29, 2.00 +30, 2.10 +31, 1.87 +32, 2.11 +33, 1.85 +34, 1.90 +35, 1.95 +36, 2.02 +37, 3.03 +38, 2.49 +39, 2.40 +40, 2.30 +41, 2.15 +42, 2.10 +43, 2.05 +44, 2.05 +45, 2.00 +46, 2.05 +47, 2.10 +48, 2.20 +49, 1.93 +50, 2.17 +51, 2.06 +52, 2.06 +53, 2.10 +54, 2.18 +55, 3.43 +56, 2.68 +57, 2.50 +72, 2.25 +73, 2.20 +74, 2.10 +75, 2.05 +76, 2.00 +77, 2.00 +78, 2.05 +79, 2.10 +80, 2.05 +81, 1.96 +82, 2.02 +83, 2.07 +84, 1.97 +85, 2.02 +86, 2.20 +87, 3.48 +88, 2.83 +89, 2.40 +90, 2.30 diff --git a/build/lib/cctk/ensemble.py b/build/lib/cctk/ensemble.py new file mode 100644 index 0000000..e35cc08 --- /dev/null +++ b/build/lib/cctk/ensemble.py @@ -0,0 +1,663 @@ +import numpy as np +from copy import deepcopy + +import cctk +from cctk.helper_functions import align_matrices + + +class Ensemble: + """ + Class representing a collection of molecules. They do not all need to have the same atoms or bonds. + + Ensembles are composed of molecules and properties. Molecules are ``Molecule`` objects, whereas properties are ``dict`` objects containing calculation-specific information. + + There are various shortcuts for handling ``Ensemble`` objects: + + - ``ensemble[molecule]`` or ``ensemble[0]`` will return new ``Ensemble`` objects with only the specified molecules. + Lists or slices can also be used: so ``ensemble[0:10:2]`` or ``ensemble[[molecule1, molecule2, molecule3]]`` will also return new ``Ensemble`` objects. + - Individual properties can be read through tuple indexing: ``ensemble[0,"energy"]`` will return the energy of the first molecule, + while ``ensemble[:,"energy"]`` will return a list of all the energies. + - To access ``Molecule`` objects, use ``ensemble.molecule``: ``ensemble.molecule[0]`` will return the first object, whereas ``ensemble.molecule[1:3]`` will return a list. + - ``ensemble.items()`` will return a list of (molecule, property) pairs. + - ``ensemble.molecule_list()`` and ``ensemble.properties_list()`` return lists of molecules and properties, respectively. + + Attributes: + name (str): name, for identification + _items (dict): keys: ``Molecule`` objects; values: dictionaries containing properties from each molecule, variable. should always be one layer deep. + molecules (``MoleculeIndexer``): special object that accesses the keys + """ + + def __init__(self, name=None): + """ + Create new instance. + + Args: + name (str): name of Ensemble + """ + self.name = name + self._items = {} + self.molecules = self._MoleculeIndexer(self) + + def __str__(self): + name = "None" if self.name is None else self.name + return f"Ensemble (name={name}, {len(self._items)} molecules)" + + def __getitem__(self, key): + if isinstance(key, (int, np.integer)): + mol = self.molecule_list()[key] + prop = self.properties_list()[key] + new = type(self)(name=self.name) # will return either Ensemble or subclass thereof + new.add_molecule(mol, properties=prop) + return new + elif isinstance(key, cctk.Molecule): + idx = self.molecule_list().index(key) + return self[idx] + elif isinstance(key, (list, np.ndarray)): + new_list = [self[k] for k in key] + return self.join_ensembles(new_list, name=self.name) + elif isinstance(key, slice): + start, stop, step = key.indices(len(self)) + return self[list(range(start, stop, step))] + elif isinstance(key, tuple): + return self.get_property(key[0], key[1]) + elif key is None: + return self + else: + raise KeyError(f"not a valid datatype for Ensemble key: {type(key)}") + + def __setitem__(self, key, item): + assert isinstance(key, tuple), "need two indexes to set a value in an ensemble!" + idx = key[0] + name = key[1] + + if isinstance(idx, slice): + start, stop, step = idx.indices(len(self)) + self[list(range(start, stop, step)), name] = item + elif isinstance(idx, (list, np.ndarray)) and isinstance(item, (list, np.ndarray)): + assert len(idx) == len(item), f"can't set {len(item)} items into {len(key)} variables (cf. pigeonhole principle)" + for (k, i) in zip(idx, item): + self[k, name] = i + elif isinstance(idx, (list, np.ndarray)): + for k in idx: + self[k, name] = item + elif isinstance(idx, (int, np.integer)): + mol = self.molecule_list()[idx] + self[mol, name] = item + elif isinstance(idx, cctk.Molecule): + if isinstance(name, (list, np.ndarray)): + for n in name: + self[idx,n] = item + #### we can't assign multiple items to a list of names since that would preclude assigning a list to a single variable + else: + self._items[idx][name] = item + else: + raise KeyError(f"not a valid datatype for Ensemble index: {type(idx)}") + + def __len__(self): + return len(self._items) + + def __iter__(self): + return iter(self.items()) + + def keys(self): + return self._items.keys() + + def values(self): + return self._items.values() + + def molecule_list(self): + """ + Returns a list of the constituent molecules. + """ + return list(self.keys()) + + def properties_list(self): + """ + Returns a list of the constituent molecules. + """ + return list(self.values()) + + def has_property(self, idx, prop): + """ + Returns ``True`` if property is defined for index ``idx`` and ``False`` otherwise. + """ + combined = self.combined_properties() + if prop in combined: + return True + else: + return False + + def combined_properties(self): + """ + Returns a dictionary containing the most up-to-date version of each property. + """ + combined = dict() + for p in self.properties_list(): + combined = {**combined, **p} + return combined + + def get_property(self, idx, prop): + """ + """ + ensemble = self[idx] + result = [] + for m, p in ensemble.items(): + if isinstance(prop, list): + row = [] + for x in prop: + if x in p: + row.append(p[x]) + else: + row.append(None) + result.append(row) + else: + if prop in p: + result.append(p[prop]) + else: + result.append(None) + if len(ensemble) == 1: + if result[0] is None: + return None + return result[0] + else: + found_something = False + for x in result: + if x is not None: + found_something = True + break + if found_something: + return result + else: + return None + + def get_properties_dict(self, idx): + """ + Returns the dictionary of molecule properties for the specified molecule. + + Args: + idx (int or cctk.Molecule): a molecule belonging to this ensemble, either + 0-indexed or given explicitly as a Molecule + + Returns: + the property dict corresponding to this Molecule + """ + assert isinstance(idx, (int, np.integer, cctk.Molecule)), "index must be int or Molecule" + ensemble = self[idx] + assert len(ensemble) == 1, "idx returned too many ensembles" + return ensemble.properties_list()[0] + + def items(self): + """ + Returns a list of (molecule, properties) tuple pairs. + """ + return self._items.items() + + # object to allow convenient indexing of the molecules in the ensemble + # + # allowed use cases + # + # retrieving molecules: + # ensemble.molecules[0]: first molecule + # ensemble.molecules[-1]: last molecule + # ensemble.molecules[[0,1]]: first two molecules as a list + # ensemble.molecules[0:4:2]: first and third molecules as a list + # + # setting molecule properties this way is not allowed + class _MoleculeIndexer(): + def __init__(self, ensemble): + self.ensemble = ensemble + + def __getitem__(self, key): + items_list = list(self.ensemble._items.keys()) + n_items = len(items_list) + if isinstance(key, (int, np.integer)): + self._check_key(key, n_items) + return items_list[key] + if isinstance(key, np.ndarray): + assert len(np.shape(key)) == 1, f"multidimensional keys not allowed, shape was {np.shape(key)}" + if isinstance(key, (list, np.ndarray)): + return_list = [] + for k in key: + assert isinstance(k, (int, np.integer)), f"key {k} in {str(key)} is not an integer, type is {str(type(k))}" + self._check_key(k, n_items) + return_list.append(items_list[k]) + return return_list + elif isinstance(key, slice): + start, stop, step = key.indices(n_items) + return [ items_list[i] for i in range(start, stop, step) ] + else: + raise ValueError(f"cannot index with type {str(type(key))}") + + def __setitem__(self, key, item): + raise LookupError("cannot set molecule properties this way; use ensemble.set_property_dict(molecule, property_dict) instead") + + def _check_key(self, key, n_items): + assert -n_items <= key < n_items, f"key {key} is out of range...must be between {-n_items} and {n_items-1} inclusive" + + def __iter__(self): + return iter(self.ensemble.molecule_list()) + + def properties(self, num=None): + """ + Returns a list of the constituent properties. + """ + if num is None: + return list(self.values()) + else: + assert isinstance(num, int), "num must be integer" + return list(self.values())[num] + + def sort_by(self, property_name, ascending=True): + """ + Sorts the ensemble by the specified property. + Throws an error if the property is missing for any entries. + Consistent, sort-compatible property values are assumed and not checked. + + Args: + property_name (str): the name of the property to sort on (must be a string or number) + ascending (bool): whether the property should increase or decrease in value + Returns: + new Ensemble (current ensemble is not modified) + """ + property_list = self[:,property_name] + if property_list is None: + raise ValueError(f"property '{property_name}' not found in ensemble") + property_list = np.asarray(property_list) + n_missing_entries = np.count_nonzero(property_list==None) + if n_missing_entries > 0: + error = "---sorting error---\n" + error += str(property_list) + raise ValueError(f"{error}\nproperty '{property_name}' has {n_missing_entries} missing entries and cannot be sorted") + new_indices = np.argsort(property_list) + if not ascending: + new_indices = np.flip(new_indices) + return self[[new_indices]] + + def add_molecule(self, molecule, properties=None, copy=False): + """ + Adds a molecule to the ensemble. + + Args: + molecule (Molecule): the molecule to be added + properties (dict): property name (str) to property value + copy (bool): whether to store an independent copy of the molecule + """ + if not isinstance(molecule, cctk.Molecule): + raise TypeError("molecule is not a Molecule - so it can't be added!") + + if copy: + molecule = deepcopy(molecule) + + if properties is None: + #### empty dicts all point to the same memory address by default, so need to prevent that behavior by initializing non-empty dict + properties = {"placeholder": 1} + del properties["placeholder"] + + assert isinstance(properties, dict), f"properties must be a dict and not type {type(properties)}" + + self._items[molecule] = properties + + def _check_molecule_number(self, number): + """ + Helper method which performs quick checks on the validity of a given molecule number. + """ + try: + number = int(number) + except: + raise TypeError(f"atom number {number} must be integer") + + if number >= len(self._items): + raise ValueError(f"atom number {number} too large!") + + @classmethod + def join_ensembles(cls, ensembles, name=None): + """ + Creates a new Ensemble object from existing ensembles. + + If every ensemble has energies defined, then the new ensemble will have energies defined too. + + Args: + name (str): name of Ensemble created + ensembles (list of Ensembles): Ensemble objects to join + """ + new_ensemble = Ensemble(name=name) + for ensemble in ensembles: + assert isinstance(ensemble, Ensemble), "can't join an object that isn't an Ensemble!" + + for ensemble in ensembles: + new_ensemble._items.update(ensemble.items()) + + return new_ensemble + + def lowest_molecules(self, property_name, num=1): + """ + Retrieves the molecules with the lowest values of the specified property. + + Args: + property_name (str): the name of the property to sort on + num (int): how many molecules to return + Returns: + lowest ``Molecule`` (if num==1) + ``list`` of ``Molecule`` (otherwise) + """ + assert isinstance(num, (int, np.integer)), f"num must be an integer, got {type(num)}" + assert num > 0, f"num must be > 0, got {num}" + sorted_ensemble = self.sort_by(property_name) + if num > 1: + return sorted_ensemble.molecules[0:num] + return sorted_ensemble.molecules[0] + +class ConformationalEnsemble(Ensemble): + """ + Class that representing a group of conformers. All members must have the same atom types in the same order. + """ + + def __str__(self): + n_atoms = 0 + if len(self._items) > 0: + first_molecule = self.molecule_list()[0] + n_atoms = first_molecule.num_atoms() + if self.name is not None: + return f"ConformationalEnsemble (name={self.name}, {len(self._items)} molecules, {n_atoms} atoms)" + else: + return f"ConformationalEnsemble ({len(self._items)} molecules, {n_atoms} atoms)" + + def add_molecule(self, molecule, properties=None, copy=False, checks=True): + """ + Checks that the molecule contains the same atom types in the same order as existing molecules, and that the molecule has the same charge/multiplicity. + """ + if len(self._items) > 0: + initial_mol = self.molecule_list()[0] + if molecule.num_atoms() != initial_mol.num_atoms(): + raise ValueError("wrong number of atoms for this ensemble") + + if molecule.charge != initial_mol.charge: + raise ValueError("wrong charge for this ensemble") + + if molecule.multiplicity != initial_mol.multiplicity: + raise ValueError("wrong spin multiplicity for this ensemble") + + if checks and not np.array_equal(molecule.atomic_numbers, initial_mol.atomic_numbers): + raise ValueError("wrong atom types for this ensemble") + + #### only save one copy to save space + molecule.bonds = initial_mol.bonds + molecule.atomic_numbers = initial_mol.atomic_numbers + + super().add_molecule(molecule, properties, copy) + + @classmethod + def join_ensembles(cls, ensembles, name=None, copy=False): + """ + Creates a new ConformationalEnsemble object from existing ensembles. + Both molecules and properties are copied. + + Args: + name (str): name of ConformationalEnsemble created + ensembles (list of ConformationalEnsembles): ConformationalEnsemble objects to join + copy (bool): whether to make copies of the component molecules + """ + new_ensemble = ConformationalEnsemble(name=name) + for ensemble in ensembles: + assert isinstance(ensemble, ConformationalEnsemble), "can't join an object that isn't a ConformationalEnsemble!" + + for ensemble in ensembles: + for mol, prop in ensemble.items(): + new_ensemble.add_molecule(mol, prop, copy) + + return new_ensemble + + def align(self, to_geometry=0, comparison_atoms="heavy", compute_RMSD=False): + """ + Aligns every geometry in this ensemble to the specified geometry, + optionally computing the root-mean-square distance between each + geometry and the reference geometry. + + Alignments are based on `atom_numbers`. + The current ensemble will not be altered. RMSDs will be calculated over the + comparison atoms only. + + Args: + to_geometry (int): the reference geometry to align to (0-indexed) + comparison_atoms (str or list): which atoms to use when computing alignments + "heavy" for all non-hydrogen atoms, + "all" for all atoms, or + a list of 1-indexed atom numbers + compute_RMSD (Bool): whether to return RMSD before and after rotation + + Returns: + new aligned ``ConformationalEnsemble`` or + new aligned ``ConformationalEnsemble``, before_RMSD array, after_RMSD array + """ + # check inputs + self._check_molecule_number(to_geometry) + n_atoms = self.molecules[0].num_atoms() + + if isinstance(comparison_atoms, str): + if comparison_atoms == "all": + comparison_atoms = np.arange(1, n_atoms + 1) + elif comparison_atoms == "heavy": + comparison_atoms = self.molecules[0].get_heavy_atoms() + assert isinstance(comparison_atoms, (list, np.ndarray, cctk.OneIndexedArray)), f"unexpected type for comparison_atoms: {str(type(comparison_atoms))}" + for a in comparison_atoms: + assert 1 <= a <= n_atoms, f"atom number out of range: got {a}, but must be between 1 and {n_atoms}" + + assert len(comparison_atoms) >= 3, f"need at least 3 atoms for alignment, but only got {len(comparison_atoms)}" + + # duplicate the ensemble + new_ensemble = deepcopy(self) + + # translate all molecules to the origin + # with respect to the comparison atoms + for molecule, _ in new_ensemble: + full_geometry = molecule.geometry + partial_geometry = full_geometry[comparison_atoms] + translation_vector = -partial_geometry.mean(axis=0) + molecule.translate_molecule(translation_vector) + + full_template_geometry = new_ensemble.molecules[to_geometry].geometry + partial_template_geometry = full_template_geometry[comparison_atoms] + before_RMSDs = [] + after_RMSDs = [] + + # perform alignment using Kabsch algorithm + for i, (molecule, _) in enumerate(new_ensemble): + full_geometry = molecule.geometry + partial_geometry = full_geometry[comparison_atoms] + if compute_RMSD: + before_RMSD = cctk.helper_functions.compute_RMSD(partial_template_geometry, partial_geometry) + before_RMSDs.append(before_RMSD) + new_geometry = align_matrices(partial_geometry, full_geometry, partial_template_geometry) + molecule.geometry = new_geometry + if compute_RMSD: + partial_geometry = new_geometry[comparison_atoms] + after_RMSD = cctk.helper_functions.compute_RMSD(partial_template_geometry, partial_geometry) + after_RMSDs.append(after_RMSD) + assert len(molecule.geometry) == n_atoms, f"wrong number of geometry elements! expected {n_atoms}, got {len(molecule.geometry)}" + + if compute_RMSD: + return new_ensemble, before_RMSDs, after_RMSDs + return new_ensemble + + def eliminate_redundant(self, RMSD_cutoff=0.5, comparison_atoms="heavy", return_RMSD=False): + """ + Aligns every geometry in this ensemble and then creates a new ensemble that contains only the non-redundant conformers. + If energies are available, the lowest energy conformer will be kept for every redundancy. + The current ensemble will not be modified. The resulting ensemble will be sorted by energy (if available). + + Args: + RMSD_cutoff (float): remove conformers that are more similar than this threshold + to_geometry (int): the reference geometry to align to (0-indexed) + comparison_atoms (str or list): which atoms to use when computing alignments + "heavy" for all non-hydrogen atoms, + "all" for all atoms, or + a list of 1-indexed atom numbers + return_RMSD (bool): whether or not to return list of RMSD values + + Returns: + new ``ConformationalEnsemble``, RMSDs to the reference geometry + """ + # check inputs + n_atoms = self.molecules[0].num_atoms() + if isinstance(comparison_atoms, str): + if comparison_atoms == "all": + comparison_atoms = np.arange(1, n_atoms + 1) + elif comparison_atoms == "heavy": + comparison_atoms = self.molecules[0].get_heavy_atoms() + + assert isinstance(comparison_atoms, (list, np.ndarray, cctk.OneIndexedArray)), f"unexpected type for comparison_atoms: {str(type(comparison_atoms))}" + for a in comparison_atoms: + assert 1 <= a <= n_atoms, f"atom number out of range: got {a}, but must be between 1 and {n_atoms}" + assert len(comparison_atoms) >= 3, f"need at least 3 atoms for alignment, but only got {len(comparison_atoms)}" + + assert isinstance(RMSD_cutoff, (float, int)), f"RMSD cutoff must be a float but got {str(type(RMSD_cutoff))}" + assert RMSD_cutoff > 0.0001, "must use a big enough RMSD cutoff" + + # align all molecules + old_ensemble = self.align(to_geometry=0, comparison_atoms=comparison_atoms, compute_RMSD=False) + + # sort molecules by energy if available + energies_available = True + for molecule,properties in old_ensemble.items(): + if "energy" not in properties: + energies_available = False + break + + n_molecules = len(old_ensemble) + sorted_indices = list(range(n_molecules)) + if energies_available: + energies = old_ensemble[:,"energy"] + sorted_indices = list(np.argsort(energies)) + + # boolean indexing noticeably faster + idxs = np.array(comparison_atoms) + mask = np.zeros(old_ensemble.molecules[0].geometry.shape[0], dtype=bool) + mask[idxs - 1] = True + + partial_geoms = [m.geometry[mask] for m in old_ensemble.molecules] + new_partial_geoms = [] + + rmsds = list() + + # add molecules one by one + new_ensemble = ConformationalEnsemble() + for i in sorted_indices: + ok_to_add = True + + candidate_rmsd = 0 + for existing_molecule in new_partial_geoms: + candidate_rmsd = cctk.helper_functions.compute_RMSD(partial_geoms[i], existing_molecule, checks=False) + if candidate_rmsd < RMSD_cutoff: + ok_to_add = False + break + + if ok_to_add: + candidate_molecule = old_ensemble.molecules[i] + candidate_molecule_properties = old_ensemble.get_properties_dict(candidate_molecule) + + new_ensemble.add_molecule(candidate_molecule, candidate_molecule_properties) + new_partial_geoms.append(candidate_molecule.geometry[mask]) + rmsds.append(candidate_rmsd) + + if return_RMSD: + return new_ensemble, rmsds + else: + return new_ensemble + + def get_geometric_parameters(self, parameter, atom1, atom2, atom3=None, atom4=None): + """ + Computes and outputs geometric parameters (bond distances, angles, or dihedral angles) for every member of ``self.molecules.`` + + Args: + parameter (str): one of ``angle``, ``distance``, or ``dihedral`` + atom1 (int): number of the atom in question + atom2 (int): same, but for the second atom + atom3 (int): same, but for the third atom (only required for parameter ``angle`` or ``dihedral``) + atom4 (int): same, but for the fourth atom (only required for parameter ``dihedral``) + + Returns: + a list of the specified parameter's values for each geometry + """ + output = [None] * len(self) + for index, molecule in enumerate(self.molecule_list()): + if parameter == "distance": + output[index] = molecule.get_distance(atom1, atom2) + elif parameter == "angle": + if atom3 is None: + raise ValueError("need atom3 to calculate angle!") + output[index] = molecule.get_angle(atom1, atom2, atom3) + elif parameter == "dihedral": + if (atom3 is None) or (atom4 is None): + raise ValueError("need atom3 and atom4 to calculate dihedral!") + output[index] = molecule.get_dihedral(atom1, atom2, atom3, atom4) + else: + raise ValueError(f"Invalid parameter {parameter}!") + + return output + + def assign_connectivity(self, index=0): + """ + Assigns connectivity for all molecules based on molecule of index ``index``. Much faster than assigning connectivity for each individually -- but assumes all bonding is the same. + """ + assert isinstance(index, int), "Need integer index" + bonds = self.molecules[index].assign_connectivity().bonds + + for mol in self.molecules: + mol.bonds = bonds + + return self + + def boltzmann_average(self, which, energies=None, temp=298, energy_unit="hartree", return_weights=False): + """ + Computes the Boltzmann-weighted average of a property over the whole ensemble. + + Args: + which (str): which property to compute + energy (np.ndarray): list of energies to use for weighting. + Will default to ``self[:,"energy"]``, although other strings can be passed as well as shorthand for ``self[:,energy]``. + temp (float): temperature for Boltzmann-weighting, in K + energy_unit (str): either ``kcal_mol`` or ``hartree`` + return_weights (bool): whether to return a list of weights too + + Returns: + weighted property, of the same shape as the individual property + """ + if energies is None: + energies = self[:,"energy"] + elif isinstance(energies, str): + energies = self[:,energies] + elif isinstance(energies, (list, np.ndarray, cctk.OneIndexedArray)): + pass + else: + raise ValueError(f"invalid energy value {energies} (type {type(energies)})") + + for i, (m, pd) in enumerate(self.items()): + assert which in pd, f"molecule #{i} doesn't have property {which} defined!" + + values = np.array(self[:,which], dtype=np.float64) + energies = np.array(energies, dtype=np.float64) + + assert len(energies) == len(self) + assert len(values) == len(self) + assert all([e is not None for e in energies]), "energy not defined for all molecules" + assert all([v is not None for v in values]), f"property {which} not defined for all molecules" + + # perhaps at some point we will need a real unit system like simtk/OpenMM, but not today! + if energy_unit == "kcal_mol": + energies = energies / 627.509 + energies = energies - np.min(energies) + + R = 3.1668105e-6 # eH/K + + weights = np.exp(-1*energies/(R*temp)) + weights = weights / np.sum(weights) + + try: + weighted_value = np.average(values, weights=weights) + except Exception as e: + raise ValueError(f"error computing Boltzmann average: {e}") + + if return_weights: + return weighted_value, weights + else: + return weighted_value diff --git a/build/lib/cctk/file.py b/build/lib/cctk/file.py new file mode 100644 index 0000000..ac12e55 --- /dev/null +++ b/build/lib/cctk/file.py @@ -0,0 +1,81 @@ +import os +from abc import ABC, abstractmethod + + +class File(ABC): + """ + Abstract class representing text files. + """ + + @abstractmethod + def __init__(self): + pass + + @staticmethod + def write_file(filename, text, overwrite_existing=True): + """ + Writes output text to a file. + + Args: + filename (str): path to file, including name (e.g. ``path/to/input.gjf``) + text (str): desired contents of file + overwrite_existing (Bool): whether any existing files should be overwritten or not + + Returns: + ``True`` if write succeeded, ``False`` otherwise + """ + if not isinstance(text, str): + raise TypeError("cannot write non-string to file!") + + if not overwrite_existing and os.path.exists(filename): + raise ValueError(f"{filename} already exists but not allowed to overwrite") + else: + try: + with open(filename, "w+") as output_file: + output_file.write(text) + return True + except OSError as e: + print(e) + return False + + @staticmethod + def append_to_file(filename, text): + """ + Appends output text to a file. + + Args: + filename (str): path to file, including name (e.g. ``path/to/input.gjf``) + text (str): desired contents of file + + Returns: + ``True`` if write succeeded, ``False`` otherwise + """ + if not isinstance(text, str): + raise TypeError("cannot write non-string to file!") + + if os.path.exists(filename): + try: + with open(filename, "a+") as output_file: + output_file.write(text) + return True + except OSError as e: + print(e) + return False + else: + raise ValueError(f"{filename} does not exist") + + @staticmethod + def read_file(filename, lazy=False): + """ + Reads a file and parses into lines. + + Args: + filename (str): The path to the file. + + Returns: + A list containing all the lines in the file. + """ + with open(filename, "r") as filehandle: + lines = filehandle.read().splitlines() + return lines + diff --git a/build/lib/cctk/gaussian_file.py b/build/lib/cctk/gaussian_file.py new file mode 100644 index 0000000..68fb938 --- /dev/null +++ b/build/lib/cctk/gaussian_file.py @@ -0,0 +1,757 @@ +import re, warnings +import numpy as np + +from enum import Enum + +from cctk import File, Molecule, ConformationalEnsemble, OneIndexedArray +from cctk.helper_functions import get_symbol, get_number, get_corrected_free_energy +import cctk + +import cctk.parse_gaussian as parse + + +class GaussianJobType(Enum): + """ + Class representing allowed Gaussian job types. Not an exhaustive list, but should be fairly comprehensive. + + The value should be the Gaussian keyword, to permit automatic assignment. + + All jobs have type ``SP`` by default. + """ + + SP = "sp" + """ + Single point energy calculation. + """ + + OPT = "opt" + """ + Geometry optimization. + """ + + FREQ = "freq" + """ + Hessian calculation. + """ + + IRC = "irc" + """ + Intrinsic reaction coordinate calculation. + """ + + NMR = "nmr" + """ + NMR shielding prediction. + """ + + POP = "pop" + """ + Population analysis. + """ + + FORCE = "force" + """ + Gradient calculation. + """ + +#### This static variable tells what properties are expected from each JobType. +EXPECTED_PROPERTIES = { + "sp": ["energy", "scf_iterations",], + "opt": ["rms_displacement", "rms_force",], + "freq": ["gibbs_free_energy", "enthalpy", "frequencies",], + "nmr": ["isotropic_shielding",], + "pop": [], + "force": ["forces",], +} + + +class GaussianFile(File): + """ + Class representing Gaussian input/output files. + + Attributes: + ensemble (ConformationalEnsemble): ``ConformationalEnsemble`` instance + job_types (list): list of `job_type`` instances + route_card (str): optional, route card of .gjf file + link0 (dict): optional, dictionary of Link 0 commands (e.g. {"mem": "32GB", "nprocshared": 16}) + footer (str): optional, footer of .gjf file + successful_terminations (int): number of successful terminations (should be 1 for an opt, 2 for opt and then freq, 1 for a single point energy, etc) + elapsed_time (float): total time for job in seconds + title (str): optional, title of .gjf file + """ + + def __init__( + self, job_types=None, route_card=None, link0=None, footer=None, title="title", success=0, elapsed_time=0.0, molecule=None, + ): + """ + Create new GaussianFile object. + + Args: + job_types (list): list of ``job_type`` instances + route_card (str): optional, route card of ``.gjf`` file + link0 (dict): optional, Link 0 commands of ``.gjf`` file + footer (str): optional, footer of ``.gjf`` file + title (str): optional, title of ``.gjf`` file + success (int): num successful terminations + elapsed_time (float): total time for job in seconds + molecule (cctk.Molecule): molecule to initiate, if desired + """ + + if route_card and not isinstance(route_card, str): + raise TypeError("route card needs to be a string") + + if link0 and not isinstance(link0, dict): + raise TypeError("link0 needs to be a dict") + + if footer and not isinstance(footer, str): + raise TypeError("footer needs to be a string") + + if title and not isinstance(title, str): + raise TypeError("title needs to be a string") + + if success and not isinstance(success, int): + raise TypeError("success needs to be an integer") + + if not isinstance(elapsed_time, (float, int)) or elapsed_time < 0.0: + raise TypeError(f"elapsed_time invalid: {elapsed_time}") + + if job_types is not None: + if isinstance(job_types, str): + raise ValueError(f"invalid job_types {job_types} - did you mean to call GaussianFile.read_file({job_types})?") + if not all(isinstance(job, GaussianJobType) for job in job_types): + raise TypeError(f"invalid job_types {job_types}") + + self.ensemble = ConformationalEnsemble() + + if molecule is not None: + assert isinstance(molecule, Molecule), "molecule is not a valid cctk.Molecule!" + self.ensemble.add_molecule(molecule) + + self.route_card = route_card + self.link0 = link0 + self.footer = footer + self.title = title + self.job_types = job_types + self.successful_terminations = success + self.elapsed_time = elapsed_time + + def __str__(self): + return f"GaussianFile (title=\"{str(self.title)}\", {len(self.ensemble)} entries in Ensemble)" + + @classmethod + def write_molecule_to_file(cls, filename, molecule, route_card, link0={"mem": "32GB", "nprocshared": 16}, footer=None, title="title", append=False, print_symbol=False, point_charges=None): + """ + Write a ``.gjf`` file using the given molecule. + + Args: + filename (str): path to the new file + molecule (Molecule): which molecule to use -- a ``Molecule`` object. + route_card (str): route card for new file + link0 (dict): dictionary of Link 0 commands + footer (str): footer for new file + title (str): title of the file, defaults to "title" + append (Bool): whether or not to append to file using Link1 specifications + print_symbol (Bool): whether to print atomic symbols (instead of atomic numbers) + """ + if not isinstance(molecule, Molecule): + raise TypeError("need a valid molecule to write a file!") + + if (route_card is None) or (not isinstance(route_card, str)): + raise ValueError("can't write a file without a route card") + + if not re.match(r"^#p", route_card): + warnings.warn(f"route card doesn't start with #p: {route_card}") + + if point_charges is not None: + assert isinstance(point_charges, list), "point_charges must be list" + assert all([isinstance(pc, cctk.PointCharge) for pc in point_charges]), "point_charges must be list of point charges" + assert re.search(r"charge", route_card, flags=re.IGNORECASE), "charge must be in route_card if point_charges are present" + + #### generate the text + text = "" + if append: + text += "--Link1--\n" + + if isinstance(link0, dict): + for key, val in link0.items(): + text += f"%{key}={val}\n" + + text += f"{route_card.strip()}\n\n{title}\n\n" + + text += f"{int(molecule.charge)} {int(molecule.multiplicity)}\n" + for index, Z in enumerate(molecule.atomic_numbers, start=1): + line = molecule.get_vector(index) + if print_symbol: + Z = get_symbol(Z) + text += f"{Z:>2} {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n" + else: + text += f"{Z:2d} {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n" + + text += "\n" + if footer is not None: + text += f"{footer.strip()}\n\n" + + if point_charges is not None: + for point_charge in point_charges: + text += f"{point_charge.coordinates[0]:>13.8f} {point_charge.coordinates[1]:>13.8f} {point_charge.coordinates[2]:>13.8f} {point_charge.charge:.5f}\n" + text += "\n" + + #### write the file + if append: + super().append_to_file(filename, text) + else: + super().write_file(filename, text) + + def write_file(self, filename, molecule=None, route_card=None, link0=None, footer=None, **kwargs): + """ + Write a ``.gjf`` file, using object attributes. If no header/footer is specified, the object's header/footer will be used. + + Args: + filename (str): path to the new file + molecule (int): which molecule to use -- passed to ``self.get_molecule()``. + Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble(1-indexed). + A ``Molecule`` object can also be passed, in which case that molecule will be written to the file. + route_card (str): route card for new file + link0 (dict): dictionary of Link 0 commands (e.g. {"mem": "32GB", "nprocshared": 16} + footer (str): footer for new file + """ + if not isinstance(molecule, Molecule): + molecule = self.get_molecule(molecule) + + if route_card is None: + route_card = self.route_card + + if link0 is None: + link0 = self.link0 + + if footer is None: + footer = self.footer + + self.write_molecule_to_file(filename, molecule, route_card, link0, footer, **kwargs) + + def num_imaginaries(self): + """ + Returns the number of imaginary frequencies. + """ + return len(self.imaginaries()) + + def imaginaries(self): + """ + Returns the imaginary frequencies, rounded to the nearest integer. + """ + if (GaussianJobType.FREQ in self.job_types) and (self.ensemble[-1:,"frequencies"] is not None): + freqs = self.ensemble[-1:,"frequencies"] + if not isinstance(freqs, list) or len(freqs) == 0: + return list() + else: + return list(map(int, np.array(freqs)[np.array(freqs) < 0])) + else: + return list() + + @classmethod +# @profile + def read_file(cls, filename, return_lines=False, extended_opt_info=False): + """ + Reads a Gaussian``.out`` or ``.gjf`` file and populates the attributes accordingly. + Only footers from ``opt=modredundant`` can be read automatically -- ``genecep`` custom basis sets, &c must be specified manually. + + Note: + + Will throw ``ValueError`` if there have been no successful iterations. + + Args: + filename (str): path to the out file + return_lines (Bool): whether the lines of the file should be returned + extended_opt_info (Bool): if full parameters about each opt step should be collected + (by default, only ``rms_displacement`` and ``rms_force`` are collected) + Returns: + ``GaussianFile`` object (or list of ``GaussianFile`` objects for Link1 files) + (optional) the lines of the file (or list of lines of file for Link1 files) + """ + if re.search("gjf$", filename) or re.search("com$", filename): + return cls._read_gjf_file(filename, return_lines) + + link1_lines = parse.split_link1(filename) + files = [] + + for link1idx, lines in enumerate(link1_lines): + #### automatically assign job types based on header + header = lines.search_for_block("#p", "----", format_line=lambda x: x.lstrip(), join="") + if header is None: + raise ValueError("can't find route card! (perhaps '#p' wasn't employed?)") + job_types = cls._assign_job_types(header) + + link0 = parse.extract_link0(lines) + + title = "" + title_block = lines.search_for_block("l101.exe", "Symbolic Z-matrix", join="\n") + if title_block is not None: + for line in title_block.split("\n")[1:]: + if not re.search("-----", line): + title += line + + + (geometries, atom_list, energies, scf_iterations, success, elapsed_time) = parse.read_geometries_and_energies(lines) + success, elapsed_time = parse.extract_success_and_time(lines) + atomic_numbers = [] + + #### convert to right datatype + try: + atomic_numbers = np.array(atom_list, dtype=np.int8) + except Exception as e: + atomic_numbers = np.array(list(map(get_number, atom_list)), dtype=np.int8) + + footer = None + if re.search("modredundant", str(header)): + footer = lines.search_for_block("^ The following ModRedundant input section", "^ $", count=1, join="\n") + if footer is not None: + footer = "\n".join(list(footer.split("\n"))[1:]) # get rid of the first line + footer = "\n".join([" ".join(list(filter(None, line.split(" ")))) for line in footer.split("\n")]) + + bonds = parse.read_bonds(lines) + charge, multip = lines.find_parameter("Multiplicity", expected_length=4, which_field=[1,3], split_on="=")[0] + + f = GaussianFile(job_types=job_types, route_card=header, link0=link0, footer=footer, success=success, elapsed_time=elapsed_time, title=title) + + molecules = [None] * len(geometries) + properties = [{} for _ in range(len(geometries))] + for idx, geom in enumerate(geometries): + molecules[idx] = Molecule(atomic_numbers, geom, charge=charge, multiplicity=multip, bonds=bonds) + if idx < len(energies): + properties[idx]["energy"] = energies[idx] + if idx < len(scf_iterations): + properties[idx]["scf_iterations"] = scf_iterations[idx] + properties[idx]["link1_idx"] = link1idx + properties[idx]["filename"] = filename + properties[idx]["iteration"] = idx + + #### now for some job-type specific attributes + if GaussianJobType.OPT in job_types: + rms_forces = lines.find_parameter("RMS\s+Force", expected_length=5, which_field=2) + rms_displacements = lines.find_parameter("RMS\s+Displacement", expected_length=5, which_field=2) + + if extended_opt_info: + max_forces = lines.find_parameter("Maximum Force", expected_length=5, which_field=2) + max_displacements = lines.find_parameter("Maximum Displacement", expected_length=5, which_field=2) + max_gradients = lines.find_parameter("Cartesian Forces:", expected_length=6, which_field=3) + rms_gradients = lines.find_parameter("Cartesian Forces:", expected_length=6, which_field=5) + max_int_forces = lines.find_parameter("Internal Forces:", expected_length=6, which_field=3) + rms_int_forces = lines.find_parameter("Internal Forces:", expected_length=6, which_field=5) + delta_energy = lines.find_parameter("Predicted change in Energy", expected_length=4, which_field=3, cast_to_float=False) + + for idx, force in enumerate(rms_forces): + properties[idx]["rms_force"] = force + properties[idx]["rms_displacement"] = rms_displacements[idx] + + if extended_opt_info: + if idx < len(max_forces): + properties[idx]["max_force"] = max_forces[idx] + + if idx < len(max_displacements): + properties[idx]["max_displacement"] = max_displacements[idx] + + if idx < len(max_gradients): + properties[idx]["max_gradient"] = max_gradients[idx] + + if idx < len(rms_gradients): + properties[idx]["rms_gradient"] = rms_gradients[idx] + + if idx < len(max_int_forces): + properties[idx]["max_internal_force"] = max_int_forces[idx] + + if idx < len(rms_int_forces): + properties[idx]["rms_internal_force"] = rms_int_forces[idx] + + if idx < len(delta_energy): + change_in_energy = re.sub(r"Energy=", "", delta_energy[idx]) + properties[idx]["predicted_change_in_energy"] = float(change_in_energy.replace('D', 'E')) + + if GaussianJobType.FREQ in job_types: + enthalpies = lines.find_parameter("thermal Enthalpies", expected_length=7, which_field=6) + if len(enthalpies) == 1: + properties[-1]["enthalpy"] = enthalpies[0] + elif len(enthalpies) > 1: + raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}") + + gibbs_vals = lines.find_parameter("thermal Free Energies", expected_length=8, which_field=7) + if len(gibbs_vals) == 1: + properties[-1]["gibbs_free_energy"] = gibbs_vals[0] + elif len(gibbs_vals) > 1: + raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}") + + if GaussianJobType.FREQ in job_types: + enthalpies = lines.find_parameter("thermal Enthalpies", expected_length=7, which_field=6) + if len(enthalpies) == 1: + properties[-1]["enthalpy"] = enthalpies[0] + elif len(enthalpies) > 1: + raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}") + + gibbs_vals = lines.find_parameter("thermal Free Energies", expected_length=8, which_field=7) + if len(gibbs_vals) == 1: + properties[-1]["gibbs_free_energy"] = gibbs_vals[0] + elif len(gibbs_vals) > 1: + raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}") + + frequencies = [] + try: + frequencies = sum(lines.find_parameter("Frequencies", expected_length=5, which_field=[2,3,4]), []) + properties[-1]["frequencies"] = sorted(frequencies) + except Exception as e: + raise ValueError("error finding frequencies") + + # Temperature 298.150 Kelvin. Pressure 1.00000 Atm. + temperature = lines.find_parameter("Temperature", expected_length=6, which_field=1) + if len(temperature) == 1: + properties[-1]["temperature"] = temperature[0] + try: + corrected_free_energy = get_corrected_free_energy(gibbs_vals[0], frequencies, frequency_cutoff=100.0, temperature=temperature[0]) + properties[-1]["quasiharmonic_gibbs_free_energy"] = float(f"{float(corrected_free_energy):.6f}") # yes this is dumb + except Exception as e: + pass + + + if GaussianJobType.NMR in job_types: + nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms()) + if nmr_shifts is not None: + properties[-1]["isotropic_shielding"] = nmr_shifts.view(OneIndexedArray) + + if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search("nmr=spinspin", f.route_card,flags=re.IGNORECASE): + couplings = parse.read_j_couplings(lines, molecules[0].num_atoms()) + if couplings is not None: + properties[-1]["j_couplings"] = couplings + + if GaussianJobType.FORCE in job_types: + assert len(molecules) == 1, "force jobs should not be combined with optimizations!" + forces = parse.read_forces(lines) + properties[0]["forces"] = forces + + if GaussianJobType.POP in job_types: + if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card): + charges, spins = parse.read_hirshfeld_charges(lines) + properties[-1]["hirshfeld_charges"] = charges + properties[-1]["hirshfeld_spins"] = spins + + try: + charges = parse.read_mulliken_charges(lines) + properties[-1]["mulliken_charges"] = charges + except Exception as e: + pass + + try: + dipole = parse.read_dipole_moment(lines) + properties[-1]["dipole_moment"] = dipole + except Exception as e: + pass + + for mol, prop in zip(molecules, properties): + f.ensemble.add_molecule(mol, properties=prop) + + f.check_has_properties() + files.append(f) + + if return_lines: + if len(link1_lines) == 1: + return files[0], link1_lines[0] + else: + return files, link1_lines + else: + if len(link1_lines) == 1: + return files[0] + else: + return files + + @classmethod + def _read_gjf_file(cls, filename, return_lines=False): + """ + Reads a Gaussian ``.gjf`` or ``.com`` file and populates the attributes accordingly. + + Args: + filename (str): path to the out file + return_lines (Bool): whether the lines of the file should be returned + Returns: + GaussianFile object + (optional) the lines of the file + """ + lines = super().read_file(filename) + header = None + link0 = {} + footer = None + header_done = False + title = None + charge = None + multip = None + in_geom = False + atomic_numbers = [] + geometry = [] + + for idx, line in enumerate(lines): + if header is None: + if re.match("\%", line): + pieces = line[1:].split("=") + link0[pieces[0]] = pieces[1] + continue + if re.match("#", line): + header = line + continue + + if (title is None) and (header is not None): + if header_done: + if len(line.strip()) > 0: + title = line + else: + if len(line.strip()) > 0: + header = header + line + else: + header_done = True + continue + + if (title is not None) and (charge is None): + if len(line.strip()) > 0: + pieces = list(filter(None, line.split(" "))) + assert len(pieces) == 2, f"can't parse line {line}" + + charge = int(pieces[0]) + multip = int(pieces[1]) + in_geom = True + continue + + if in_geom == True: + if len(line.strip()) == 0: + in_geom = False + else: + pieces = list(filter(None, line.split(" "))) + assert len(pieces) == 4, f"can't parse line {line}" + + atomic_numbers.append(pieces[0]) + geometry.append([pieces[1], pieces[2], pieces[3]]) + + if (in_geom == False) and (len(geometry) > 0): + if footer: + footer = footer + "\n" + line + else: + if len(line.strip()) > 0: + footer = line + + try: + atomic_numbers = np.array(atomic_numbers, dtype=np.int8) + except Exception as e: + atomic_numbers = np.array(list(map(get_number, atomic_numbers)), dtype=np.int8) + + job_types = cls._assign_job_types(header) + + f = GaussianFile(job_types=job_types, route_card=header, link0=link0, footer=footer, title=title) + f.ensemble.add_molecule(Molecule(atomic_numbers, geometry, charge=charge, multiplicity=multip)) + if return_lines: + return f, lines + else: + return f + + def get_molecule(self, num=None, properties=False): + """ + Returns the last molecule (from an optimization job) or the only molecule (from other jobs). + + If ``num`` is specified, returns ``self.ensemble.molecule_list()[num]`` + If ``properties`` is True, returns ``(molecule, properties)``. + """ + # some methods pass num=None, which overrides setting the default above + if num is None: + num = -1 + assert isinstance(num, int), "num must be int" + + if properties: + return self.ensemble.molecule_list()[num], self.ensemble.properties_list()[num] + else: + return self.ensemble.molecule_list()[num] + + @classmethod + def _assign_job_types(cls, header): + """ + Assigns ``GaussianJobType`` objects from route card. ``GaussianJobType.SP`` is assigned by default. + + For instance, "#p opt freq=noraman" would give an output of ``[GaussianJobType.SP, GaussianJobType.OPT, GaussianJobType.FREQ]``. + + Args: + header (str): Gaussian route card + + Returns: + list of ``GaussianJobType`` objects + """ + job_types = [] + for name, member in GaussianJobType.__members__.items(): + if re.search(f" {member.value}", str(header), re.IGNORECASE): + job_types.append(member) + if GaussianJobType.SP not in job_types: + job_types.append(GaussianJobType.SP) + return job_types + + def check_has_properties(self): + """ + Checks that the file has all the appropriate properties for its job types, and raises ValueError if not. + + This only checks the last molecule in ``self.ensemble``, for now. + """ + if self.successful_terminations > 0: + if self.successful_terminations == 1 and ((GaussianJobType.OPT in self.job_types) and (GaussianJobType.FREQ in self.job_types)): + return # opt freq jobs should have two terminations + for job_type in self.job_types: + for prop in EXPECTED_PROPERTIES[job_type.value]: + if not self.ensemble.has_property(-1, prop): + raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!") + else: + return + + @classmethod + def write_ensemble_to_file(cls, filename, ensemble, route_card, link0={"mem": "32GB", "nprocshared": 16}, footer=None, title="title", print_symbol=False): + """ + Write each structure in the specified ensemble to a single Gaussian input file + by using the Link1 specification. + + Args: + filename (str): where to write the file + ensemble (Ensemble): ``Ensemble`` object to write + route_card (str or list): to use the same route card for every link, use a single string; + otherwise, provide a list whose entries parallel the ensemble members + link0 (dict or list of dicts): to use the same memory/processors for every link, use a single string; + otherwise, provide a list + footer (None/str or list): use None for no text after geometry, provide a str to specify a footer, + or provide some combination of the above as a list + title (str or list): use a single string to provide a generic title for every link or a list as above + print_symbol (bool or list): whether to print atomic symbols or atomic numbers in the geometry specification; + use a single bool or a list as above + + """ + n_geometries = len(ensemble) + assert len(ensemble) > 0, "cannot write a blank ensemble" + + if isinstance(route_card, str): + route_card = [route_card for _ in ensemble._items] + elif isinstance(route_card, list): + assert len(route_card) == n_geometries, f"expected {n_geometries} route cards but got {len(route_card)}" + for card in route_card: + assert isinstance(card, str), "expected route card to be a str" + else: + raise ValueError(f"unexpected type for route_card: {str(type(route_card))}") + + if isinstance(link0, dict): + link0 = [link0 for _ in ensemble._items] + elif isinstance(link0, list): + assert len(link0) == n_geometries, f"expected {n_geometries} link0 entries, but got {len(link0)}" + for d in link0: + assert isinstance(d, dict), f"expected dict for link0 but got {str(type(d))}" + else: + raise ValueError(f"unexpected type for link0: {str(type(link0))}") + + if footer is None or isinstance(footer, str): + footer = [footer for _ in ensemble._items] + elif isinstance(footer, list): + assert len(footer) == n_geometries, f"expected {n_geometries} footers, but got {len(footer)}" + for f in footer: + assert f is None or isinstance(f, str), f"expected str or None for footer but got {str(type(f))}" + else: + raise ValueError(f"unexpected type for footer: {str(type(footer))}") + + if isinstance(title, str): + assert len(title.strip()) > 0, "zero-length titles not allowed" + title = [title for _ in ensemble._items] + elif isinstance(title, list): + assert len(title) == n_geometries, f"expected {n_geometries} route cards but got {len(title)}" + for card in title: + assert isinstance(card, str), "expected title to be a str" + assert len(title.strip()) > 0, "zero-length titles are not allowed" + else: + raise ValueError(f"unexpected type for title: {str(type(title))}") + + if isinstance(print_symbol, bool): + print_symbol = [print_symbol for _ in ensemble._items] + elif isinstance(print_symbol, list): + assert len(print_symbol) == n_geometries, f"expected {n_geometries} print_symbol entries but got {len(print_symbol)}" + for s in print_symbol: + assert isinstance(s, bool), f"expected bool for print_symbol but got {str(type(s))}" + else: + raise ValueError(f"unexpected type for print_symbol: {str(type(print_symbol))}") + + for idx, molecule in enumerate(ensemble._items): + if idx == 0: + cls.write_molecule_to_file(filename, molecule, route_card[idx], link0[idx], footer=footer[idx], title=title[idx], print_symbol=print_symbol[idx], append=False) + else: + cls.write_molecule_to_file(filename, molecule, route_card[idx], link0[idx], footer=footer[idx], title=title[idx], print_symbol=print_symbol[idx], append=True) + + def add_custom_basis_set(self, name, add_all_elements=False, return_string=False): + """ + Appends custom basis sets (from Basis Set Exchange) to ``self.footer``. Should be used in combination with the ``gen`` keyword. + + Args: + name (str): name of basis set (look it up on Basis Set Exchange) + add_all_elements (bool): whether the complete basis set should be added or just the elements of interest + return_string (bool): if the basis set should be appended to the footer or returned as a string (no change to ``self``) + + Returns: + nothing (if return_string is ``False``) + string of basis set definition (if return string is ``True``) + """ + import basis_set_exchange as bse + assert isinstance(name, str), "need basis set name to be a string, for starters" + + try: + basis_definition = "" + if add_all_elements: + basis_definition = bse.get_basis(name, fmt="gaussian94", header=False) + else: + elements = list(np.unique(self.get_molecule().atomic_numbers.view(np.ndarray))) + basis_definition = bse.get_basis(name, fmt="gaussian94", header=False, elements=elements) + + if self.footer is None: + self.footer = basis_definition + else: + self.footer += basis_definition + self.footer += "\n" + + except Exception as e: + raise ValueError(f"adding basis set {name} from basis set exchange failed!\n{e}") + + @classmethod + def read_file(cls, filename, return_lines=False, extended_opt_info=False, fail_silently=True): +# def read_fast(cls, filename, return_lines=False, extended_opt_info=False): + """ + Reads a Gaussian``.out`` or ``.gjf`` file and populates the attributes accordingly. + Only footers from ``opt=modredundant`` can be read automatically -- ``genecep`` custom basis sets, &c must be specified manually. + + Note: + + Will throw ``ValueError`` if there have been no successful iterations. + + Args: + filename (str): path to the out file + return_lines (Bool): whether the lines of the file should be returned + extended_opt_info (Bool): if full parameters about each opt step should be collected + (by default, only ``rms_displacement`` and ``rms_force`` are collected) + fail_silently (Bool): if true, files that fail validation will just be omitted and parsing will continue. + useful for monitoring jobs which are in-progress and may not have all properties written. + Returns: + ``GaussianFile`` object (or list of ``GaussianFile`` objects for Link1 files) + (optional) the lines of the file (or list of lines of file for Link1 files) as Lines object + """ + if re.search("gjf$", filename) or re.search("com$", filename): + return cls._read_gjf_file(filename, return_lines) + + link1_lines = parse.split_link1_to_text(filename) + files = [] + + for link1idx, lines in enumerate(link1_lines): + current_file = parse.read_file_fast(lines, filename, link1idx, extended_opt_info=extended_opt_info, fail_silently=fail_silently) + if current_file is not None: + files.append(current_file) + + if return_lines: + link1_lines = parse.split_link1(filename) + if len(link1_lines) == 1: + return files[0], link1_lines[0] + else: + return files, link1_lines + else: + if len(link1_lines) == 1: + return files[0] + else: + return files + + diff --git a/build/lib/cctk/group.py b/build/lib/cctk/group.py new file mode 100644 index 0000000..1ca0304 --- /dev/null +++ b/build/lib/cctk/group.py @@ -0,0 +1,277 @@ +import copy +import numpy as np +import networkx as nx + +import cctk +from cctk.helper_functions import get_covalent_radius, compute_angle_between, compute_rotation_matrix + + +class Group(cctk.Molecule): + """ + Class representing a functional group. + + Note that a Group instance does not need to be missing atoms. Rather, the atom given by `attach_to` will be replaced wholesale by another molecule, and the bond distances scaled automatically. + + Attributes: + attach_to (int): atom number to replace with larger fragment. must have only one bond! (e.g. H in F3C-H) + adjacent (int): atom number that will be bonded to new molecule. (e.g. C in F3C-H) + isomorphic (list of lists): list of lists of atoms that should be considered symmetry equivalent. + For instance, the three methyl protons can be considered symmetry equivalent, so ``methane.isomorphic = [[3, 4, 5]]``. + _map_from_truncated(dict): a dictionary mapping atom numbers of the group without ``attach_to`` to the atom numbers of the normal group + """ + + def __init__(self, attach_to, isomorphic=None, **kwargs): + super().__init__(**kwargs) + self.add_attachment_point(attach_to) + self._map_from_truncated = None + + if isomorphic is not None: + assert isinstance(isomorphic, list), "group.isomorphic must be list of lists!" + self.isomorphic = isomorphic + + @classmethod + def new_from_molecule(cls, molecule, attach_to, **kwargs): + """ + Convenient method to convert ``molecule`` to ``group`` directly. + """ + group = Group(attach_to, atomic_numbers=molecule.atomic_numbers, geometry=molecule.geometry, bonds=molecule.bonds.edges(), **kwargs) + return group + + def add_attachment_point(self, attach_to): + """ + Adds ``attach_to`` and ``adjacent`` attributes to the instance. + + Automatically centers atom ``adjacent`` on the origin, to simplify downstream mathematics. + """ + n_bonds = len(super().get_adjacent_atoms(attach_to)) + if n_bonds != 1: + raise ValueError(f"atom {attach_to} is making {n_bonds} but must make 1 bond to be a valid attachment point") + + self.attach_to = attach_to + + adjacent = super().get_adjacent_atoms(attach_to) + assert len(adjacent) == 1, "can't substitute an atom with more than one adjacent atom!" + self.adjacent = adjacent[0] + + adj_v = super().get_vector(self.adjacent) + super().translate_molecule(-adj_v) + + @staticmethod + def add_group_to_molecule(molecule, group, add_to, optimize=True, return_mapping=False): + """ + Adds a `Group` object to a `Molecule` at the specified atom, and returns a new `Molecule` object (generated using `copy.deepcopy()`). + Automatically attempts to prevent clashes by minimizing pairwise atomic distances. + + The atom in `group` that replaces `add_to` in `molecule` will inherit the number of `add_to` - however, the other atoms in `group` will be appended to the atom list. + + Args: + molecule (Molecule): the molecule to change + group (Group): the group to affix + add_to (int): the 1-indexed atom number on `molecule` to add `group` to + optimize (bool): whether or not to perform automated dihedral optimization + return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products + + Returns: + new Molecule object + + (optional) molecule_to_new dictionary mapping atom numbers from starting molecule (key) to new atom numbers (val) + (optional) group_to_new dictionary mapping atom numbers from starting group (key) to new atom numbers (val) + """ + #### this code can be a bit complex: for an example, let's imagine converting benzene to toluene by adding methane (Group) to benzene (Molecule) + #### add_to would be the benzene H (atom on Molecule you replace with the new group) + #### adjacent_atom would be the benzene C + #### group.attach_to would be the methane H + #### group.adjacent would be the methane C + + #### prevent in-place modification of molecule - could lead to pernicious errors! + + try: + add_to = int(add_to) + except: + raise TypeError("add_to not castable to int") + + molecule = copy.deepcopy(molecule) + molecule._check_atom_number(add_to) + original_num_atoms = molecule.num_atoms() + + adjacent_atom = molecule.get_adjacent_atoms(add_to) + assert ( + len(adjacent_atom) > 0 + ), "can't substitute an atom without an adjacent atom! (are there bonds defined for this molecule? consider calling molecule.assign_connectivity()!)" + assert len(adjacent_atom) == 1, "can't substitute an atom with more than one adjacent atom!" + adjacent_atom = adjacent_atom[0] + + attach_to = group.attach_to + other_indices = np.ones_like(group.atomic_numbers).astype(bool) + other_indices[attach_to] = False + other_indices[group.adjacent] = False + + #### we need to change the bond length somewhat to prevent strange behavior + old_radius = get_covalent_radius(molecule.atomic_numbers[add_to]) + new_radius = get_covalent_radius(group.atomic_numbers[group.adjacent]) + delta_rad = new_radius - old_radius + + #### make the swap! (this only adds the atoms, still have to get the geometry right) + molecule.atomic_numbers[add_to] = group.atomic_numbers[group.adjacent] + new_indices = [i + molecule.num_atoms() for i in range(1, np.sum(other_indices) + 1)] + molecule.atomic_numbers = np.hstack([molecule.atomic_numbers, group.atomic_numbers[other_indices]]) + molecule.atomic_numbers = molecule.atomic_numbers.view(cctk.OneIndexedArray) + + #### have to keep track of what all the new indices are, to carry over connectivity + new_indices.insert(group.adjacent - 1, add_to) + new_indices.insert(attach_to - 1, adjacent_atom) + + #### track atom number mapping + molecule_to_new = {z : z for z in range(1, molecule.num_atoms() + 1)} + molecule_to_new[add_to] = None + + group_to_new = {} + offset = 1 + for z in range(1, group.num_atoms() + 1): + if other_indices[z]: + group_to_new[z] = original_num_atoms + offset + offset += 1 + else: + group_to_new[z] = None + group_to_new[group.adjacent] = add_to + + #### adjust the bond length by moving add_to + molecule.set_distance(adjacent_atom, add_to, molecule.get_distance(adjacent_atom, add_to) + delta_rad) + + #### rotate group to match the new positioning + v_g = group.get_vector(group.attach_to, group.adjacent) + v_m = molecule.get_vector(add_to, adjacent_atom) + theta = compute_angle_between(v_g, v_m) + + #### rotate each atom and add it... + center_pos = molecule.get_vector(add_to) + rot = compute_rotation_matrix(np.cross(v_g, v_m), -(180 - theta)) + for vector in group.geometry[other_indices]: + new_v = np.dot(rot, vector) + center_pos + molecule.geometry = np.vstack((molecule.geometry, new_v)) + molecule.geometry = molecule.geometry.view(cctk.OneIndexedArray) + + #### now we have to merge the new bonds + for (atom1, atom2) in group.bonds.edges(): + molecule.add_bond(new_indices[atom1-1], new_indices[atom2-1]) + assert molecule.get_bond_order(add_to, adjacent_atom), "we didn't add the bond we were supposed to form!" + + assert len(molecule.atomic_numbers) == len( + molecule.geometry + ), f"molecule has {len(molecule.atomic_numbers)} atoms but {len(molecule.geometry)} geometry elements!" + + #### now we want to find the "lowest" energy conformation, defined as the rotamer which minimizes the RMS distance between all atoms + if group.num_atoms() > 3 and optimize: + adjacent_on_old_molecule = molecule.get_adjacent_atoms(adjacent_atom)[0] + adjacent_on_new_molecule = molecule.get_adjacent_atoms(add_to)[-1] + molecule.optimize_dihedral(adjacent_on_old_molecule, adjacent_atom, add_to, adjacent_on_new_molecule) + + if molecule.check_for_conflicts(): + if return_mapping: + return molecule, molecule_to_new, group_to_new + else: + return molecule + else: + raise ValueError(f"molecule contains conflicts!") + + @staticmethod + def remove_group_from_molecule(molecule, atom1, atom2, return_mapping=False): + """ + The microscopic reverse of ``add_group_to_molecule`` -- splits a ``Molecule`` along the ``atom1``–``atom2`` bond + and returns a new ``Molecule`` object (the ``atom1`` side) and a new ``Group`` (the ``atom2`` side). + + The new objects will be capped with hydrogens; atom ordering will be preserved! + + Args: + molecule (Molecule): the molecule to change + atom1 (int): the 1-indexed atom number on `molecule` to make part of the new ``Molecule`` object + atom2 (int): the 1-indexed atom number on `molecule` to make part of the new ``Group`` object + return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products + + Returns: + new Molecule object + new Group object + + (optional) molecule_to_molecule dictionary mapping atom numbers from starting molecule (key) to new molecule atom numbers (val) + (optional) molecule_to_group dictionary mapping atom numbers from starting molecule (key) to new group atom numbers (val) + """ + try: + atom1 = int(atom1) + atom2 = int(atom2) + except: + raise TypeError("atom numbers not castable to int") + + molecule = copy.deepcopy(molecule) + molecule._check_atom_number(atom1) + molecule._check_atom_number(atom2) + + #### define mapping dicts + fragment1, fragment2 = molecule._get_bond_fragments(atom1, atom2) + molecule_to_molecule = {x: i+1 for i, x in enumerate(fragment1)} + molecule_to_group = {x: i+1 for i, x in enumerate(fragment2)} + + #### create new molecules + new_mol = cctk.Molecule(molecule.atomic_numbers[fragment1], molecule.geometry[fragment1]) + group = cctk.Molecule(molecule.atomic_numbers[fragment2], molecule.geometry[fragment2]) + + #### add capping H to new_mol + new_mol.add_atom("H", molecule.geometry[atom2]) + molecule_to_molecule[atom2] = new_mol.num_atoms() + old_radius = get_covalent_radius(molecule.atomic_numbers[atom2]) + H_radius = get_covalent_radius(1) + new_dist = new_mol.get_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2]) - old_radius + H_radius + new_mol.set_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2], new_dist) + new_mol.add_bond(molecule_to_molecule[atom1], molecule_to_molecule[atom2]) + + #### add capping H to new group + group.add_atom("H", molecule.geometry[atom1]) + molecule_to_group[atom1] = group.num_atoms() + old_radius = get_covalent_radius(molecule.atomic_numbers[atom1]) + new_dist = group.get_distance(molecule_to_group[atom2], molecule_to_group[atom1]) - old_radius + H_radius + group.set_distance(molecule_to_group[atom2], molecule_to_group[atom1], new_dist) + group.add_bond(molecule_to_group[atom2], molecule_to_group[atom1]) + + #### add bonds to nascent molecules + molecule.remove_bond(atom1, atom2) + for (a1, a2) in molecule.bonds.edges(): + if a1 in fragment1: + assert a2 in fragment1, "somehow we have another bond between the two groups!" + assert molecule_to_molecule[a1] is not None, f"we don't have a mapping for atom {a1}" + assert molecule_to_molecule[a2] is not None, f"we don't have a mapping for atom {a2}" + new_mol.add_bond(molecule_to_molecule[a1], molecule_to_molecule[a2]) + elif a2 in fragment2: + assert a2 in fragment2, "somehow we have another bond between the two groups!" + assert molecule_to_group[a1] is not None, f"we don't have a mapping for atom {a1}" + assert molecule_to_group[a2] is not None, f"we don't have a mapping for atom {a2}" + group.add_bond(molecule_to_group[a1], molecule_to_group[a2]) + + #### create Group object from group + group = cctk.Group.new_from_molecule(attach_to=molecule_to_group[atom1], molecule=group) + + if return_mapping: + return new_mol, group, molecule_to_molecule, molecule_to_group + else: + return new_mol, group + + def map_from_truncated(self): + """ + Returns a dictionary mapping atomic numbers without ``attach_to`` to atomic_numbers with ``attach_to``. + """ + if self._map_from_truncated is not None: + return self._map_from_truncated + + assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + g = copy.deepcopy(self) + g._add_atomic_numbers_to_nodes() + tg = copy.deepcopy(g) + tg.remove_atom(g.attach_to) + + nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0) + match = nx.algorithms.isomorphism.GraphMatcher(g.bonds, tg.bonds, node_match=nm) + + for sg in match.subgraph_isomorphisms_iter(): + if self.attach_to in sg.keys(): + continue + sg = {v: k for k, v in sg.items()} # invert + self._map_from_truncated = sg + return sg diff --git a/build/lib/cctk/groups/AcH.mol2 b/build/lib/cctk/groups/AcH.mol2 new file mode 100644 index 0000000..190bb76 --- /dev/null +++ b/build/lib/cctk/groups/AcH.mol2 @@ -0,0 +1,29 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +7 6 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -1.0756 -1.3886 0.0424 C +2 H2 -1.6678 -0.4492 0.0405 H +3 O3 0.1828 -1.3886 0.0445 O +4 C4 -1.8969 -2.6913 0.0425 C +5 H5 -2.9404 -2.4551 0.0428 H +6 H6 -1.6605 -3.2620 -0.8312 H +7 H7 -1.6602 -3.2621 0.9161 H +@BOND +1 1 2 1 +2 1 3 2 +3 1 4 1 +4 4 5 1 +5 4 6 1 +6 4 7 1 diff --git a/build/lib/cctk/groups/BrH.mol2 b/build/lib/cctk/groups/BrH.mol2 new file mode 100644 index 0000000..cab2100 --- /dev/null +++ b/build/lib/cctk/groups/BrH.mol2 @@ -0,0 +1,19 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +2 1 +SMALL +NO_CHARGES + + +@ATOM +1 Br1 0.2861 0.4087 0.0000 Br +2 H2 -1.1539 0.4087 0.0000 H +@BOND +1 1 2 1 diff --git a/build/lib/cctk/groups/CF3H.mol2 b/build/lib/cctk/groups/CF3H.mol2 new file mode 100644 index 0000000..170ba6b --- /dev/null +++ b/build/lib/cctk/groups/CF3H.mol2 @@ -0,0 +1,25 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +5 4 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -0.2897 0.6009 0.0000 C +2 H2 0.0670 -0.4080 0.0000 H +3 F3 -1.6397 0.6009 0.0000 F +4 F4 0.1604 1.2372 -1.1023 F +5 F5 0.1604 1.2372 1.1023 F +@BOND +1 1 2 1 +2 1 3 1 +3 1 4 1 +4 1 5 1 diff --git a/build/lib/cctk/groups/CHOH.mol2 b/build/lib/cctk/groups/CHOH.mol2 new file mode 100644 index 0000000..714fd2c --- /dev/null +++ b/build/lib/cctk/groups/CHOH.mol2 @@ -0,0 +1,23 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +4 3 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -1.0756 -1.3886 0.0424 C +2 H2 -1.6678 -0.4492 0.0424 H +3 H3 -1.6678 -2.3280 0.0425 H +4 O4 0.1828 -1.3886 0.0424 O +@BOND +1 1 2 1 +2 1 3 1 +3 1 4 2 diff --git a/build/lib/cctk/groups/ClH.mol2 b/build/lib/cctk/groups/ClH.mol2 new file mode 100644 index 0000000..924cf5f --- /dev/null +++ b/build/lib/cctk/groups/ClH.mol2 @@ -0,0 +1,19 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +2 1 +SMALL +NO_CHARGES + + +@ATOM +1 Cl1 0.2861 0.4087 0.0000 Cl +2 H2 -1.0039 0.4087 0.0000 H +@BOND +1 1 2 1 diff --git a/build/lib/cctk/groups/EtH.mol2 b/build/lib/cctk/groups/EtH.mol2 new file mode 100644 index 0000000..5556704 --- /dev/null +++ b/build/lib/cctk/groups/EtH.mol2 @@ -0,0 +1,31 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +8 7 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -0.2897 0.6009 0.0000 C +2 H2 0.0670 -0.4080 0.0000 H +3 H3 0.0670 1.1053 -0.8737 H +4 H4 -1.3597 0.6009 0.0000 H +5 C5 0.2237 1.3268 1.2574 C +6 H6 -0.1314 2.3362 1.2565 H +7 H7 -0.1346 0.8235 2.1311 H +8 H8 1.2937 1.3251 1.2584 H +@BOND +1 1 2 1 +2 1 3 1 +3 1 4 1 +4 1 5 1 +5 5 6 1 +6 5 7 1 +7 5 8 1 diff --git a/build/lib/cctk/groups/FH.mol2 b/build/lib/cctk/groups/FH.mol2 new file mode 100644 index 0000000..4f303aa --- /dev/null +++ b/build/lib/cctk/groups/FH.mol2 @@ -0,0 +1,19 @@ +# Title +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +2 1 +SMALL +NO_CHARGES + + +@ATOM +1 F1 0.2861 0.4087 0.0000 F +2 H2 -0.5939 0.4087 0.0000 H +@BOND +1 1 2 1 diff --git a/build/lib/cctk/groups/HCN.mol2 b/build/lib/cctk/groups/HCN.mol2 new file mode 100644 index 0000000..779ebd6 --- /dev/null +++ b/build/lib/cctk/groups/HCN.mol2 @@ -0,0 +1,21 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +3 2 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -0.2764 0.5632 0.0000 C +2 H2 -1.3454 0.5632 0.0000 H +3 N3 0.8702 0.5632 0.0000 N +@BOND +1 1 2 1 +2 1 3 3 diff --git a/build/lib/cctk/groups/HCO2Me.mol2 b/build/lib/cctk/groups/HCO2Me.mol2 new file mode 100644 index 0000000..993ced6 --- /dev/null +++ b/build/lib/cctk/groups/HCO2Me.mol2 @@ -0,0 +1,31 @@ +# Title +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +8 7 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -0.5889 -0.6103 0.0000 C +2 H2 -1.1825 0.3138 0.0000 H +3 O3 0.6695 -0.6103 0.0000 O +4 O4 -1.3618 -1.8134 -0.0000 O +5 C5 -0.7063 -3.0843 -0.0024 C +6 H6 -0.0937 -3.1713 0.8705 H +7 H7 -1.4394 -3.8637 -0.0028 H +8 H8 -0.0954 -3.1689 -0.8768 H +@BOND +1 1 2 1 +2 1 3 2 +3 1 4 1 +4 4 5 1 +5 5 6 1 +6 5 7 1 +7 5 8 1 diff --git a/build/lib/cctk/groups/HNO2.mol2 b/build/lib/cctk/groups/HNO2.mol2 new file mode 100644 index 0000000..fe6ece2 --- /dev/null +++ b/build/lib/cctk/groups/HNO2.mol2 @@ -0,0 +1,23 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +4 3 +SMALL +NO_CHARGES + + +@ATOM +1 N1 -0.2764 0.5632 0.0000 N +2 H2 -0.7519 -0.3165 -0.0000 H +3 O3 -0.9543 1.5986 0.0000 O +4 O4 0.9612 0.5632 0.0000 O +@BOND +1 1 2 1 +2 1 3 Ar +3 1 4 Ar diff --git a/build/lib/cctk/groups/IH.mol2 b/build/lib/cctk/groups/IH.mol2 new file mode 100644 index 0000000..d94917f --- /dev/null +++ b/build/lib/cctk/groups/IH.mol2 @@ -0,0 +1,19 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +2 1 +SMALL +NO_CHARGES + + +@ATOM +1 I1 0.2861 0.4087 0.0000 I +2 H2 -1.3439 0.4087 0.0000 H +@BOND +1 1 2 1 diff --git a/build/lib/cctk/groups/MeH.mol2 b/build/lib/cctk/groups/MeH.mol2 new file mode 100644 index 0000000..3da4b48 --- /dev/null +++ b/build/lib/cctk/groups/MeH.mol2 @@ -0,0 +1,25 @@ +# Title +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +5 4 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -0.2897 0.6009 0.0000 C +2 H2 0.0670 -0.4080 0.0000 H +3 H3 0.0670 1.1053 0.8737 H +4 H4 0.0670 1.1053 -0.8737 H +5 H5 -1.3597 0.6009 0.0000 H +@BOND +1 1 2 1 +2 1 3 1 +3 1 4 1 +4 1 5 1 diff --git a/build/lib/cctk/groups/NH3.mol2 b/build/lib/cctk/groups/NH3.mol2 new file mode 100644 index 0000000..69f5ff0 --- /dev/null +++ b/build/lib/cctk/groups/NH3.mol2 @@ -0,0 +1,23 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +4 3 +SMALL +NO_CHARGES + + +@ATOM +1 N1 -0.2897 0.6009 0.0000 N +2 H2 0.0436 -0.3419 0.0000 H +3 H3 0.0436 1.0723 0.8165 H +4 H4 0.0436 1.0723 -0.8165 H +@BOND +1 1 2 1 +2 1 3 1 +3 1 4 1 diff --git a/build/lib/cctk/groups/NHAcH.mol2 b/build/lib/cctk/groups/NHAcH.mol2 new file mode 100644 index 0000000..4276eae --- /dev/null +++ b/build/lib/cctk/groups/NHAcH.mol2 @@ -0,0 +1,33 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +9 8 +SMALL +NO_CHARGES + + +@ATOM +1 N1 -0.2764 0.5632 0.0000 N +2 H2 0.7069 0.3810 -0.0017 H +3 C3 -2.3071 1.9480 0.0000 C +4 H4 -2.6637 1.8678 1.0056 H +5 H5 -2.6643 2.8587 -0.4335 H +6 H6 -2.6633 1.1170 -0.5721 H +7 C7 -0.7671 1.9489 0.0000 C +8 O8 -0.0196 3.0120 -0.0656 O +9 H9 -0.9258 -0.1972 0.0017 H +@BOND +1 1 2 1 +2 1 7 1 +3 1 9 1 +4 3 4 1 +5 3 5 1 +6 3 6 1 +7 3 7 1 +8 7 8 Ar diff --git a/build/lib/cctk/groups/NMe2H.mol2 b/build/lib/cctk/groups/NMe2H.mol2 new file mode 100644 index 0000000..4128bc5 --- /dev/null +++ b/build/lib/cctk/groups/NMe2H.mol2 @@ -0,0 +1,35 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +10 9 +SMALL +NO_CHARGES + + +@ATOM +1 N1 -0.2764 0.5632 0.0000 N +2 H2 0.7236 0.5638 -0.0018 H +3 C3 -0.7638 -0.1292 1.2016 C +4 H4 -1.8338 -0.1294 1.2038 H +5 H5 -0.4054 0.3757 2.0743 H +6 H6 -0.4070 -1.1380 1.2014 H +7 C7 -0.7671 1.9489 0.0000 C +8 H8 -0.4122 2.4529 -0.8746 H +9 H9 -1.8371 1.9483 0.0019 H +10 H10 -0.4091 2.4540 0.8727 H +@BOND +1 1 2 1 +2 1 3 1 +3 1 7 1 +4 3 4 1 +5 3 5 1 +6 3 6 1 +7 7 8 1 +8 7 9 1 +9 7 10 1 diff --git a/build/lib/cctk/groups/OH2.mol2 b/build/lib/cctk/groups/OH2.mol2 new file mode 100644 index 0000000..7960d76 --- /dev/null +++ b/build/lib/cctk/groups/OH2.mol2 @@ -0,0 +1,21 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +3 2 +SMALL +NO_CHARGES + + +@ATOM +1 O1 -0.2897 0.6009 0.0000 O +2 H2 0.6703 0.6009 0.0000 H +3 H3 -0.6102 1.5058 0.0000 H +@BOND +1 1 2 1 +2 1 3 1 diff --git a/build/lib/cctk/groups/OMeH.mol2 b/build/lib/cctk/groups/OMeH.mol2 new file mode 100644 index 0000000..15f674b --- /dev/null +++ b/build/lib/cctk/groups/OMeH.mol2 @@ -0,0 +1,27 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +6 5 +SMALL +NO_CHARGES + + +@ATOM +1 O1 -0.2897 0.6009 0.0000 O +2 H2 0.6703 0.6009 -0.0018 H +3 C3 -0.7671 1.9489 0.0000 C +4 H4 -0.4107 2.4535 -0.8737 H +5 H5 -1.8371 1.9483 0.0000 H +6 H6 -0.4107 2.4535 0.8737 H +@BOND +1 1 2 1 +2 1 3 1 +3 3 4 1 +4 3 5 1 +5 3 6 1 diff --git a/build/lib/cctk/groups/SF5H.mol2 b/build/lib/cctk/groups/SF5H.mol2 new file mode 100644 index 0000000..63f3905 --- /dev/null +++ b/build/lib/cctk/groups/SF5H.mol2 @@ -0,0 +1,29 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +7 6 +SMALL +NO_CHARGES + + +@ATOM +1 S1 0.2861 0.4087 0.0000 S +2 H2 1.5961 0.4087 0.0000 H +3 F3 0.2861 -1.1813 0.0000 F +4 F4 -1.3039 0.4087 0.0000 F +5 F5 0.2861 0.4087 1.5900 F +6 F6 0.2861 0.4087 -1.5900 F +7 F7 0.2861 1.9987 0.0000 F +@BOND +1 1 2 1 +2 1 3 1 +3 1 4 1 +4 1 5 1 +5 1 6 1 +6 1 7 1 diff --git a/build/lib/cctk/groups/SO3HH.mol2 b/build/lib/cctk/groups/SO3HH.mol2 new file mode 100644 index 0000000..d76b8bb --- /dev/null +++ b/build/lib/cctk/groups/SO3HH.mol2 @@ -0,0 +1,27 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +6 5 +SMALL +NO_CHARGES + + +@ATOM +1 S1 0.2861 0.4087 0.0000 S +2 H2 1.4395 0.8503 -0.4367 H +3 O3 0.0384 -1.1462 -0.5567 O +4 O4 0.2861 0.4087 1.6700 O +5 O5 -0.9366 1.4006 -0.5567 O +6 H6 -0.5604 2.1046 -1.0901 H +@BOND +1 1 2 1 +2 1 3 1 +3 1 4 1 +4 1 5 1 +5 5 6 1 diff --git a/build/lib/cctk/groups/__init__.py b/build/lib/cctk/groups/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/build/lib/cctk/groups/iPrH.mol2 b/build/lib/cctk/groups/iPrH.mol2 new file mode 100644 index 0000000..6f72d60 --- /dev/null +++ b/build/lib/cctk/groups/iPrH.mol2 @@ -0,0 +1,37 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +11 10 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -0.2897 0.6009 0.0000 C +2 H2 0.0670 -0.4080 0.0000 H +3 H3 0.0670 1.1053 -0.8737 H +4 C4 0.2237 1.3268 1.2574 C +5 H5 -0.1314 2.3362 1.2565 H +6 H6 -0.1346 0.8235 2.1311 H +7 H7 1.2937 1.3251 1.2584 H +8 C8 -1.8297 0.6009 0.0000 C +9 H9 -2.1864 1.6097 0.0004 H +10 H10 -2.1864 0.0968 -0.8739 H +11 H11 -2.1864 0.0961 0.8734 H +@BOND +1 1 2 1 +2 1 3 1 +3 1 4 1 +4 1 8 1 +5 4 5 1 +6 4 6 1 +7 4 7 1 +8 8 9 1 +9 8 10 1 +10 8 11 1 diff --git a/build/lib/cctk/groups/tBuH.mol2 b/build/lib/cctk/groups/tBuH.mol2 new file mode 100644 index 0000000..1540153 --- /dev/null +++ b/build/lib/cctk/groups/tBuH.mol2 @@ -0,0 +1,43 @@ +# Molecule Name +# Created by GaussView 6.0.16 +# + +# +# + +@MOLECULE +Molecule Name +14 13 +SMALL +NO_CHARGES + + +@ATOM +1 C1 -0.2897 0.6009 0.0000 C +2 H2 0.0670 -0.4080 0.0000 H +3 C3 0.2237 1.3268 1.2574 C +4 H4 -0.1314 2.3362 1.2565 H +5 H5 -0.1346 0.8235 2.1311 H +6 H6 1.2937 1.3251 1.2584 H +7 C7 -1.8297 0.6009 0.0000 C +8 H8 -2.1864 1.6097 0.0004 H +9 H9 -2.1864 0.0968 -0.8739 H +10 H10 -2.1864 0.0961 0.8734 H +11 C11 0.2237 1.3268 -1.2574 C +12 H12 1.2937 1.3270 -1.2573 H +13 H13 -0.1328 0.8223 -2.1311 H +14 H14 -0.1332 2.3356 -1.2576 H +@BOND +1 1 2 1 +2 1 3 1 +3 1 7 1 +4 1 11 1 +5 3 4 1 +6 3 5 1 +7 3 6 1 +8 7 8 1 +9 7 9 1 +10 7 10 1 +11 11 12 1 +12 11 13 1 +13 11 14 1 diff --git a/build/lib/cctk/helper_functions.py b/build/lib/cctk/helper_functions.py new file mode 100644 index 0000000..0431dc6 --- /dev/null +++ b/build/lib/cctk/helper_functions.py @@ -0,0 +1,708 @@ +""" +Miscellaneous helper functions. +""" + +import numpy as np +import math, re +from io import BytesIO + +#### python 3.6 or earlier doesn't have importlib.resources, but it's backported as importlib_resources +try: + import importlib.resources as pkg_resources +except ImportError: + import importlib_resources as pkg_resources + +from . import data # relative-import the *package* containing the templates +import cctk + +""" +This code populates ELEMENT_DICTIONARY and ISOTOPE_DICTIONARY from a static datafile. +""" +ELEMENT_DICTIONARY = {} +ISOTOPE_DICTIONARY = {} + +with pkg_resources.open_text(data, "isotopes.csv") as isotope_file: + prev_number = 1 + current_dict = {} + for line in isotope_file: + symbol, number, mass, abundance = line.split(",") + if symbol == "Symbol": + continue + + ELEMENT_DICTIONARY[number] = symbol + + if number == prev_number: + current_dict[float(mass)] = float(abundance.rstrip()) + else: + ISOTOPE_DICTIONARY[prev_number] = current_dict + current_dict = {} + current_dict[float(mass)] = float(abundance.rstrip()) + + prev_number = number + + ISOTOPE_DICTIONARY[prev_number] = current_dict + ELEMENT_DICTIONARY["0"] = "Bq" + +INV_ELEMENT_DICTIONARY = {v: int(k) for k, v in ELEMENT_DICTIONARY.items()} + +def get_symbol(atomic_number): + """ + Gets element symbol from a given atomic number. + + Args: + atomic_number (int): the number of the given element + + Returns: + the two-character atomic symbol string + """ + atomic_number = str(atomic_number) + if atomic_number in ELEMENT_DICTIONARY: + return ELEMENT_DICTIONARY[atomic_number] + else: + raise ValueError(f"unknown atomic number: '{atomic_number}'") + + +def get_number(atomic_symbol): + """ + Gets atomic number from a given element symbol (converted to titlecase using ``string.title()``). + + Args: + atomic_symbol (str): the two-character symbol + + Returns: + the atomic number + """ + if atomic_symbol.title() in INV_ELEMENT_DICTIONARY: + return int(INV_ELEMENT_DICTIONARY[atomic_symbol.title()]) + else: + raise ValueError("unknown atomic symbol: ", atomic_symbol) + + +""" +This code populates COVALENT_RADII_DICTIONARY from a static datafile. +""" +COVALENT_RADII_DICTIONARY = {} +with pkg_resources.open_text(data, "covalent_radii.csv") as covalent_radii: + for line in covalent_radii: + line_fragments = line.split(",") + + #### There's a variable number from line to line, but the first three are always number, symbol, radius + if line_fragments[1] == "Symbol": + continue + COVALENT_RADII_DICTIONARY[line_fragments[0]] = line_fragments[2] + +def get_covalent_radius(atomic_number): + """ + Gets the covalent radius for a given element. + + Args: + atomic_number (int): the number of the given element + + Returns: + the covalent radius in Angstroms (float) + """ + # if isinstance(atomic_number, int): + atomic_number = str(atomic_number) + if atomic_number in COVALENT_RADII_DICTIONARY: + return float(COVALENT_RADII_DICTIONARY[atomic_number]) + else: + raise ValueError("no covalent radius defined for atomic number ", atomic_number) + +""" +This code populates VDW_RADII_DICTIONARY from a static datafile. +""" +VDW_RADII_DICTIONARY = {} +with pkg_resources.open_text(data, "vdw_radii.csv") as vdw_radii: + for line in vdw_radii: + line_fragments = line.split(",") + + #### There's a variable number from line to line, but the first three are always number, symbol, radius + VDW_RADII_DICTIONARY[line_fragments[0]] = line_fragments[1] + +def get_vdw_radius(atomic_number): + """ + Gets the van der Waals radius for a given element. + + Args: + atomic_number (int): the number of the given element + + Returns: + the van der Waals radius in Angstroms (float) + """ + # if isinstance(atomic_number, int): + atomic_number = str(atomic_number) + if atomic_number in VDW_RADII_DICTIONARY: + return float(VDW_RADII_DICTIONARY[atomic_number]) + else: + raise ValueError("no van der Waals radius defined for atomic number ", atomic_number) + +def compute_distance_between(v1, v2, _norm=np.linalg.norm): + """ + Computes the L2 distance between two vectors. + + (preloading ``_norm`` speeds repeated calls, since Python doesn't have to look up the function every time) + """ + return _norm(v1 - v2) + + +def compute_unit_vector(vector): + """ + Normalizes a vector, returning a unit vector pointing in the same direction. + Returns the zero vector if the zero vector is given. + """ + norm = np.linalg.norm(vector) + if norm == 0: + return vector + else: + return vector / norm + + +def compute_angle_between(v1, v2, unit="degree"): + """ + Computes the angle between two vectors. + + Args: + v1 (ndarray): first vector + v2 (ndarray): second vector + unit (str): 'degree' or 'radian' + + Returns: + the angle between the two vectors + """ + v1_u = compute_unit_vector(v1) + v2_u = compute_unit_vector(v2) + angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) + if unit == "degree": + return np.degrees(angle) % 360 + elif unit == "radian": + return angle % (2 * math.pi) + else: + raise ValueError(f"invalid unit {unit}: must be 'degree' or 'radian'!") + + +def compute_dihedral_between(p0, p1, p2, p3, unit="degree"): + """ + Computes the dihedral angle between four points. + """ + b0 = -1.0 * (p1 - p0) + b1 = p2 - p1 + b2 = p3 - p2 + + # normalize b1 so that it does not influence magnitude of vector + b1 = compute_unit_vector(b1) + + # v = projection of b0 onto plane perpendicular to b1 + # = b0 minus component that aligns with b1 + # w = projection of b2 onto plane perpendicular to b1 + # = b2 minus component that aligns with b1 + v = b0 - np.dot(b0, b1) * b1 + w = b2 - np.dot(b2, b1) * b1 + + # angle between v and w in a plane is the torsion angle + # v and w may not be normalized but that's fine since tan is y/x + x = np.dot(v, w) + y = np.dot(np.cross(b1, v), w) + + angle = np.arctan2(y, x) + + if unit == "degree": + return np.degrees(angle) % 360 + elif unit == "radian": + return angle % (2 * math.pi) + else: + raise ValueError(f"invalid unit {unit}: must be 'degree' or 'radian'!") + + +def compute_rotation_matrix(axis, theta): + """ + Return the rotation matrix for rotation around ``axis`` by ``theta`` degrees.. + Adapted from user "unutbu" on StackExchange. + + Args: + axis (np.ndarray): the vector to rotate about + theta (float): how much to rotate (in degrees) + + Returns: + the 3x3 rotation matrix + """ + if (not isinstance(axis, np.ndarray)) or (len(axis) != 3): + raise TypeError("axis must be np array with 3 elements") + + try: + theta = float(theta) + except: + raise TypeError("theta must be float!") + + theta = np.radians(theta) + axis = compute_unit_vector(axis) + + a = math.cos(theta / 2.0) + b, c, d = -axis * math.sin(theta / 2.0) + + aa, bb, cc, dd = a * a, b * b, c * c, d * d + bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d + return np.array( + [ + [aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)], + [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)], + [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc], + ] + ) + +def align_matrices(P_partial, P_full, Q_partial, return_matrix=False): + """ + Rotates one set of points onto another using the Kabsch algorithm. + The rotation that best aligns P_partial into Q_partial will be found and then applied to P_full. + + Args: + P_partial (matrix): atoms of P that correspond to Q + P_full (matrix): full matrix to rotate + Q (matrix): matrix to align to + + Returns: + rotated P matrix + """ + assert np.shape(P_partial) == np.shape(Q_partial) + + C = P_partial.T @ Q_partial + U, S, Vt = np.linalg.svd(C) + + V = Vt.T + d = np.linalg.det(V @ U.T) + middle = np.identity(3) + + if d < 0.0: + middle[2][2] = -1.0 + + rotation = U @ middle @ Vt + return P_full @ rotation + +def compute_RMSD(geometry1, geometry2, checks=True): + """ + Computes the root mean squared difference between two geometries. + + Args: + geometry1 (np.array (dimensions: n atoms x 3): geometry + geometry2 (np.array (dimensions: n atoms x 3): geometry + checks (bool): whether to check that the inputs make sense (True by default) + + Returns: + the root-mean-square distance between the two geometries + """ + if checks and not isinstance(geometry1, cctk.OneIndexedArray): + raise ValueError(f"expected cctk.OneIndexedArray but got {str(type(geometry1))} instead") + if checks and not isinstance(geometry2, cctk.OneIndexedArray): + raise ValueError(f"expected cctk.OneIndexedArray but got {str(type(geometry2))} instead") + + if checks and len(geometry2) != len(geometry1): + raise ValueError("can't compare two geometries with different lengths!") + + return np.sqrt( np.sum( ( geometry1.view(np.ndarray) - geometry2.view(np.ndarray) ) ** 2) / len(geometry1) ) + +def get_isotopic_distribution(z): + """ + For an element with number ``z``, returns two ``np.ndarray`` objects containing that element's weights and relative abundances. + + Args: + z (int): atomic number + + Returns: + masses (np.ndarray): list of isotope masses + weights (np.ndarray): list of weights (relative to 1.00 for largest) + """ + z = str(z) + masses = list(ISOTOPE_DICTIONARY[z].keys()) + weights = list(ISOTOPE_DICTIONARY[z].values()) + return np.array(masses), np.array(weights) + +def get_avg_mass(z): + """ + For an element with number ``z``, return average mass of that element. + """ + masses, weights = get_isotopic_distribution(z) + return np.dot(masses, weights) + +def get_z_from_mass(desired_mass, tolerance=0.001): + """ + For an element with atomic mass ``desired_mass``, return the element's atomic number. + + Returns ``None`` if no mass within ``tolerance`` is found. + """ + for z in ISOTOPE_DICTIONARY.keys(): + z = int(z) + mass = get_avg_mass(z) + if mass == 0: + continue + + if abs(desired_mass - mass) < tolerance: + return z + +def draw_isotopologue(z): + """ + For an element with number ``z``, return a weighted random atomic mass (so will return 12 99% of the time and 13 1% of the time for carbon). + """ + z = str(z) + masses, weights = get_isotopic_distribution(z) + return np.random.choice(masses, p=weights) + +# dict: atomic symbol --> (slope, intercept) +# defines the slope to be positive +DEFAULT_NMR_SCALING_FACTORS = { + "H" : (1.0716, 31.6660), + "C" : (1.0300, 180.4300), + "N" : (0.9776, 244.5626) +} + +def scale_nmr_shifts(ensemble, symmetrical_atom_numbers=None, scaling_factors="default", property_name="isotropic_shielding"): + """ + Apply linear scaling to isotropic shieldings to get chemical shifts. + Shifts are calculated as (intercept-shielding)/slope. + If there are no shifts available for a structure, None will be placed in both + return lists. + + Args: + ensemble: an ``Ensemble`` with calculated nmr shifts + symmetrical_atom_numbers: None to perform no symmetry-averaging, a list of lists + of 1-indexed atom numbers (e.g. [ [2,4,5], [7,8] ]) for + a ConformationalEnsemble, or triply-nested lists for an + Ensemble, where the outer index refers to the index of + the Ensemble. + scaling_factors: "default" to use DEFAULT_NMR_SCALING_FACTORS or a dict + (atomic symbol --> (slope,intercept)). Elements for + which scaling factors are not provided will be ignored. + property_name: the key in properties_dict to use to locate the predicted + isotropic shieldings (default="isotropic_shielding") + + Returns: + scaled_shifts: np.array (matching the shape of the original shieldings minus symmetry averaging) + shift_labels: np.array (also matches shape) + """ + # check inputs + assert isinstance(ensemble, cctk.Ensemble), f"expected Ensemble but got {str(type(ensemble))} instead" + assert len(ensemble) > 0, "empty ensemble not allowed" + if symmetrical_atom_numbers is None: + symmetrical_atom_numbers = [] + assert isinstance(symmetrical_atom_numbers, list), f"symmetrical atom numbers should be specified as a list of lists, but got {str(type(ensemble))} instead" + for l in symmetrical_atom_numbers: + assert isinstance(l, list), f"symmetrical atom numbers must be specified as lists, but got {str(type(l))} instead: {str(l)}" + if scaling_factors == "default": + scaling_factors = DEFAULT_NMR_SCALING_FACTORS + else: + assert isinstance(scaling_factors, dict) + assert len(scaling_factors) > 0, "must provide scaling factors" + assert isinstance(property_name, str) and len(property_name)>0, f"property_name {property_name} is invalid" + + # get shieldings and scale + all_scaled_shifts = [] + all_shift_labels = [] + for i,(molecule,properties) in enumerate(ensemble.items()): + if property_name in properties: + # get atom numbers and atomic elements as OneIndexedArrays + atomic_numbers = molecule.atomic_numbers + n_atoms = len(atomic_numbers) + atomic_symbols = [ get_symbol(n) for n in atomic_numbers ] + atomic_symbols = cctk.OneIndexedArray(atomic_symbols) + atom_numbers = list(range(1,n_atoms+1)) +# symbol_dict = dict(zip(atomic_numbers,atomic_symbols)) + all_labels = [ f"{current_symbol}{atom_number}" for current_symbol,atom_number in zip(atomic_symbols,atom_numbers) ] + all_labels = cctk.OneIndexedArray(all_labels) + + # check symmetrical atom numbers make sense + n_atoms = len(atomic_numbers) + symmetrical_groups_dict = {} # symbol --> [ [list1], [list2], ...] where each list is a group of symmetrical atom numbers + symmetrical_groups_dict2 = {} # symbol --> [ union of all symmetrical atom numbers for this symbol ] +# unique_atoms_dict = {} # symbol --> [ union of all unique atom numbers for this symbol ] + for symmetrical_group in symmetrical_atom_numbers: + assert len(symmetrical_group) > 1, "must be at least 2 symmetrical nuclei in a group" + assert len(symmetrical_group) == len(set(symmetrical_group)), f"check for duplicate atom numbers in {symmetrical_group}" + symmetrical_symbol = None + for atom_number in symmetrical_group: + assert 1 <= atom_number <= n_atoms, f"atom number {atom_number} is out of range" + if symmetrical_symbol is None: + symmetrical_symbol = atomic_symbols[atom_number] + assert symmetrical_symbol in scaling_factors, f"no scaling factors available for the element {symmetrical_symbol}" + assert atomic_symbols[atom_number] == symmetrical_symbol,\ + (f"all atoms in a symmetrical group must correspond to the same element\n" + f"expected element {symmetrical_symbol} for atom {atom_number}," + f"but got element {atomic_symbols[atom_number]}") + if symmetrical_symbol not in symmetrical_groups_dict: + symmetrical_groups_dict[symmetrical_symbol] = [] + symmetrical_groups_dict[symmetrical_symbol].append(symmetrical_group) + if symmetrical_symbol not in symmetrical_groups_dict2: + symmetrical_groups_dict2[symmetrical_symbol] = [] + symmetrical_groups_dict2[symmetrical_symbol].extend(symmetrical_group) + + # get shieldings + all_shieldings = properties[property_name] + + # iterate through requested elements + molecule_shifts = [] + molecule_labels = [] + for symbol_of_interest,(slope,intercept) in scaling_factors.items(): + # sanity checks + assert isinstance(slope,float), f"expected slope to be float, but got {str(type(slope))}" + assert slope != 0, "zero slope not allowed" + assert isinstance(intercept,float), f"expected intercept to be float, but got {str(type(intercept))}" + + # determine unique atoms + unique_atom_numbers_list = [] + for atomic_symbol,atom_number in zip(atomic_symbols,atom_numbers): + if atomic_symbol != symbol_of_interest: + continue + if symbol_of_interest in symmetrical_groups_dict2: + if atom_number in symmetrical_groups_dict2[symbol_of_interest]: + continue + unique_atom_numbers_list.append(atom_number) + + # extract relevant shieldings and labels for unique atoms + if len(unique_atom_numbers_list) > 0: + selected_shieldings = list(all_shieldings[unique_atom_numbers_list]) + selected_labels = list(all_labels[unique_atom_numbers_list]) + else: + selected_shieldings = [] + selected_labels = [] + + # extract relevant shieldings and labels for symmetrical groups + symmetrical_groups = [] + if symbol_of_interest in symmetrical_groups_dict: + symmetrical_groups = symmetrical_groups_dict[symbol_of_interest] + for symmetrical_group in symmetrical_groups: + first_atom_number = symmetrical_group[0] + current_atomic_symbol = atomic_symbols[first_atom_number] + if current_atomic_symbol == symbol_of_interest: + group_shieldings = all_shieldings[symmetrical_group] + averaged_shielding = group_shieldings.mean() + selected_shieldings.append(averaged_shielding) + label = f"{current_atomic_symbol}" + for j,atom_number in enumerate(symmetrical_group): + label += f"{atom_number}" + if j < len(symmetrical_group) - 1: + label += "/" + selected_labels.append(label) + + # apply scaling + assert len(selected_shieldings) == len(selected_labels), "shieldings and labels should have 1:1 correspondence" + selected_shifts = np.array(selected_shieldings) + selected_shifts = (intercept-selected_shifts)/slope + selected_labels = np.array(selected_labels) + + # update results + molecule_shifts.extend(selected_shifts) + molecule_labels.extend(selected_labels) + + # update master results if appropriate + if len(molecule_shifts) > 0: + all_scaled_shifts.append(molecule_shifts) + all_shift_labels.append(molecule_labels) + else: + # assume this means a bug + raise ValueError("no relevant shieldings were extracted for this molecule!") + else: + # there are no shieldings available, so append None + all_scaled_shifts.append(None) + all_shift_labels.append(None) + + # return result + scaled_shifts = np.array(all_scaled_shifts) + shift_labels = np.array(all_shift_labels) + return scaled_shifts, shift_labels + +def compute_chirality(v1, v2, v3, v4): + """ + Given 4 bond vectors, returns 1 or -1 based on chirality. + For proper Cahn–Ingold–Prelog results, vectors should be passed from highest to lowest priority; however, any predictable order will give meaningful results. + + Args: + v1 (np.ndarray): 3D bond vector + v2 (np.ndarray): 3D bond vector + v3 (np.ndarray): 3D bond vector + v4 (np.ndarray): 3D bond vector + + Returns: + value of 1 (R by CIP) or -1 (S by CIP) + """ + assert (isinstance(v1, np.ndarray) and len(v1) == 3), "v1 needs to be a 3-element np.ndarray!" + assert (isinstance(v2, np.ndarray) and len(v2) == 3), "v2 needs to be a 3-element np.ndarray!" + assert (isinstance(v3, np.ndarray) and len(v3) == 3), "v3 needs to be a 3-element np.ndarray!" + assert (isinstance(v4, np.ndarray) and len(v4) == 3), "v4 needs to be a 3-element np.ndarray!" + + e1 = np.array([1, 0, 0]) + e3 = np.array([0, 0, 1]) + + # rotate v4 so that it's pointing back! + axis1 = np.cross(v4, e1) + theta1 = compute_angle_between(e1, v4) + + R1 = compute_rotation_matrix(axis1, theta1) + v1 = R1 @ v1 + v2 = R1 @ v2 + v3 = R1 @ v3 + v4 = R1 @ v4 + + assert 1.0 > compute_angle_between(v4, e1), "rotating v4 failed" + + # rotate v1 so that it's pointing up! + axis2 = v4 + theta2 = compute_angle_between(np.array([0, v1[1], v1[2]]), e3) # projection of v1 onto e2•e3 plane + + R2 = compute_rotation_matrix(axis2, np.sign(v1[1]) * theta2) + v1 = R2 @ v1 + v2 = R2 @ v2 + v3 = R2 @ v3 + v4 = R2 @ v4 + + assert 1.0 > compute_angle_between(v4, e1), f"rotating v4 failed - 1.0 ≤ {compute_angle_between(v4, e1)}" + assert 1.0 > compute_angle_between(e3, np.array([0, 0, v1[2]])), f"rotating v1 failed, - 1.0 ≤ {compute_angle_between(e3, np.array([0, 0, v1[2]]))}" + + answer = np.sign(v2[1]) + assert np.sign(v3[1]) != answer, "at this point our two substituents are on the same side of the plane that's supposed to divide them" + return answer + +# constants for calculating entropy +ENTROPY_FACTOR_1 = 1.43877695998381562 # 2.99792458E10 * 6.62606957E-34 / 1.3806488E-23 +ENTROPY_FACTOR_2 = 1.9872041348 # 8.3144621 / 4.184 + +def get_entropy(frequencies, temperature): + """ + Computes the total entropy of a given set of frequencies. + + Args: + frequencies (list): in cm-1 + temperature (float): in K + + Returns: + entropy (float): in hartree + """ + factor0 = ENTROPY_FACTOR_1 / temperature + entropy = 0.0 + for frequency in frequencies: + factor = factor0 * frequency + temp = factor * 1.0/(math.exp(factor)-1.0) - math.log(1.0-math.exp(-factor)) + temp = temp * ENTROPY_FACTOR_2 + entropy += temp + return entropy / 627.509469 + +def get_corrected_free_energy(free_energy, frequencies, frequency_cutoff=100.0, temperature=298.15): + """ + Computes the free energy by moving all positive frequencies below ``frequency_cutoff`` + to the cutoff. See Cramer/Truhlar, J. Phys. Chem. B, 2011, 115, 14556. + + Args: + free_energy (float): in hartree + frequencies (list): in cm-1 + frequency_cutoff (float): in cm-1 + temperature (float): in K + + Returns: + corrected_free_energy (float): in hartree + """ + low_frequencies = [] + for frequency in frequencies: + if frequency > 0 and frequency < frequency_cutoff: + low_frequencies.append(frequency) + entropy_uncorrected = get_entropy(low_frequencies, temperature) + entropy_corrected = get_entropy([frequency_cutoff], temperature) * len(low_frequencies) + entropy_correction = (entropy_uncorrected - entropy_corrected)*temperature/1000.0 + corrected_free_energy = free_energy + entropy_correction + return corrected_free_energy + +def numpy_to_bytes(arr): + """ Utility function for pickling numpy arrays """ + arr_bytes = BytesIO() + np.save(arr_bytes, arr, allow_pickle=True) + arr_bytes = arr_bytes.getvalue() + return arr_bytes + +def bytes_to_numpy(arr_bytes): + """ Utility function for unpickling numpy arrays """ + load_bytes = BytesIO(arr_bytes) + loaded_np = np.load(load_bytes, allow_pickle=True) + return loaded_np + +def compute_mass_spectrum(formula_dict, **kwargs): + """ + Computes the expected low-res mass spec ions for a given formula. + + Args: + formula dict (dict): e.g. {"C": 6, "H": 6} + + Returns: + list of m/z ions + list of relative weights (out of 1 total) + """ + form_vec = np.zeros(shape=92, dtype=np.int8) + for z, n in formula_dict.items(): + if isinstance(z, str): + z = get_number(z) + assert isinstance(z, int), "atomic number must be integer" + form_vec[z] += n + + masses, weights = _recurse_through_formula(form_vec, [0], [1], **kwargs) + + new_masses, indices = np.unique(np.round(masses, decimals=1), return_inverse=True) + new_weights = np.zeros_like(new_masses) + for k in range(len(new_weights)): + new_weights[k] = np.sum(weights[np.nonzero(indices == k)]) + new_weights = new_weights / np.max(new_weights) + + return new_masses, new_weights + +def _recurse_through_formula(formula, masses, weights, cutoff=0.0000001, mass_precision=4, weight_precision=8): + """ + Recurses through a formula and generates m/z isotopic pattern using tail recursion. + + To prevent blowup of memory, fragments with very low abundance are ignored. Masses and weights are also rounded after every step. + To prevent error accumulation, internal precisions several orders of magnitude lower than the precision of interest should be employed. + The default values should work nicely for low-res MS applications. + + Args: + formula (np.ndarray, dtype=np.int8): vector containing atoms left to incorporate. first element should always be 0 as there is no element 0. + masses (np.ndarray): list of mass fragments at current iteration + weights (np.ndarray): relative weights at current iteration + cutoff (float): cutoff for similarity (masses within ``cutoff`` will be combined) + mass_precision (int): number of decimal places to store for mass + weight_precision (int): number of decimal places to store for weight + + Returns: + masses + weights + """ + # check how many elements we haven't recursed thru yet + if np.array_equal(formula, np.zeros(shape=92, dtype=np.int8)): + return masses[np.argsort(masses)], weights[np.argsort(masses)] + + # get masses/weights for current element + current_e = np.nonzero(formula)[0][0] + e_masses, e_weights = get_isotopic_distribution(current_e) + + # combinatorially add the new masses and weights to our current lists + new_masses = np.zeros(shape=(len(masses)*len(e_masses))) + new_weights = np.zeros(shape=(len(masses)*len(e_masses))) + for i in range(len(masses)): + for j in range(len(e_masses)): + new_masses[i*len(e_masses)+j] = masses[i] + e_masses[j] + new_weights[i*len(e_masses)+j] = weights[i] * e_weights[j] + + # delete duplicates and adjust weights (complicated) + newer_masses, indices = np.unique(np.round(new_masses, decimals=mass_precision), return_inverse=True) + newer_weights = np.zeros_like(newer_masses) + for k in range(len(newer_weights)): + newer_weights[k] = np.sum(new_weights[np.nonzero(indices == k)]) + newer_weights = np.round(newer_weights, decimals=weight_precision) + + # prune the low-abundance masses/weights and move on to the next element + formula[current_e] += -1 + above_cutoff = np.nonzero(newer_weights > cutoff) + return _recurse_through_formula(formula, newer_masses[above_cutoff], newer_weights[above_cutoff], cutoff, mass_precision, weight_precision) + +def formula_dict_from_string(formula_string): + """ + Eugene challenged me to code golf, this isn't my fault. + + Args: + formula_string (str): the formula as a string, e.g. C10H12N2O1. you need the "1" explicitly + + Returns: + formula_dict (dict): e.g. {'C': 10, 'H': 12, 'N': 2, 'O': 1} + """ + return {t[0]: int(t[1]) for t in re.findall(r"([a-z]+)([0-9]+)", formula_string, re.I)} diff --git a/build/lib/cctk/lines.py b/build/lib/cctk/lines.py new file mode 100644 index 0000000..417945d --- /dev/null +++ b/build/lib/cctk/lines.py @@ -0,0 +1,163 @@ +import re +from itertools import islice + +class LazyLineObject: + """ + Instead of storing ``lines`` as an array, this object can be used. + It reduces the memory usage drastically! It looks up lines only when needed. + """ + def __init__(self, file, start, end): + self.file = file + self.start = start + self.end = end + + def __len__(self): + return self.end - self.start + + def __str__(self): + return f"LazyLineObject for file {self.file}, lines {self.start}-{self.end}" + + def __repr__(self): + return f"LazyLineObject for file {self.file}, lines {self.start}-{self.end}" + + def __iter__(self): + with open(self.file, "r") as lines: + for line in islice(lines, self.start, self.end + 1): + yield line.rstrip("\n") + + def __getitem__(self, key): + if key >= len(self): + raise KeyError("key too big") + with open(self.file, "r") as lines: + for line in islice(lines, self.start + key, self.start + key + 1): + return line.rstrip() + + def full_text(self): + text = "" + with open(self.file, "r") as lines: + for line in islice(lines, self.start, self.end + 1): + text += line.rstrip() + "\n" + return text + + def search_for_block(self, start, end, count=1, join=" ", max_len=1000, format_line=None): + """ + Search through a file (lines) and locate a block starting with "start" (inclusive) and ending with "end" (exclusive). + + Args: + start (str): a pattern that matches the start of the block (can contain special characters) + end (str): a pattern that matches the end of the block (can contain special characters) - ``None`` removes this (so a selection of ``max_lines`` is guaranteed) + count (int): how many matches to search for + join (str): spacer between lines + max_len (int): maximum length of matches (to prevent overflow) + format_line (function): function to perform to each line before adding to match (e.g. remove leading space) + + Returns: + a single match (str) if count == 1 or a list of matches (str) if count > 1. + """ + assert isinstance(count, int), "count needs to be an integer" + assert isinstance(max_len, int), "count needs to be an integer" + assert isinstance(join, str), "join needs to be a string" + + if count == 0: + return None + + current_match = "" + current_len = 0 + match = [None] * count + + #### we want a regex that will never match anything - and quickly - so trying to match something before the start of the line works + if end is None: + end = "a^" + + start_pattern = re.compile(start) + end_pattern = re.compile(end) + + index = 0 + for line in self: + if current_match: + if end_pattern.search(line) or current_len >= max_len: + match[index] = current_match + current_match = None + index += 1 + current_len = 0 + + if index == count: + break + else: + if format_line is not None: + current_match = current_match + join + format_line(line.lstrip()) + else: + current_match = current_match + join + line.lstrip() + current_len += 1 + else: + if start_pattern.search(line): + if format_line is not None: + current_match = format_line(line.lstrip()) + else: + current_match = line.lstrip() + current_len = 1 + + if count == 1: + return match[0] + else: + return match + + + def find_parameter(self, parameter, expected_length, which_field, split_on=None, cast_to_float=True): + """ + Args: + parameter (string): test to search for + expected_length (int): how many fields there should be + which_field (int or list): which field(s) the parameter is (zero-indexed) + split_on (str): additional non-space field on which to split + cast_to_float (Bool): whether or not to cast extracted value to float + Returns: + a list of all the extracted values + """ + if not isinstance(which_field, list): + which_field = [which_field] + + if not isinstance(expected_length, int): + raise TypeError("expected_length must be type int!") + + for n in which_field: + if not isinstance(n, int): + raise TypeError("which_field must be type int!") + if n >= expected_length: + raise ValueError("can't expect a field after the last field!") + + matches = [] + pattern = False + + try: + pattern = re.compile(parameter) + except Exception as e: + raise ValueError("pattern {pattern} cannot be compiled as a regex; try again!") + + if pattern: + for line in self: + if pattern.search(line): + fields = re.split(" +", line) + if split_on: + fields2 = [] + for field in fields: + fields2 = fields2 + field.split(split_on) + fields = fields2 + fields = list(filter(None, fields)) + + if len(fields) == expected_length: + desired_fields = [] + for n in which_field: + if cast_to_float: + try: + desired_fields.append(float(fields[n])) + except: + desired_fields.append(0) + else: + desired_fields.append(fields[n]) + if len(desired_fields) == 1: + matches.append(desired_fields[0]) + else: + matches.append(desired_fields) + return matches + diff --git a/build/lib/cctk/load_groups.py b/build/lib/cctk/load_groups.py new file mode 100644 index 0000000..d2c3db3 --- /dev/null +++ b/build/lib/cctk/load_groups.py @@ -0,0 +1,109 @@ +try: + import importlib.resources as pkg_resources +except ImportError: + import importlib_resources as pkg_resources + +from cctk import MOL2File, Group +from . import groups + +filenames = [ + "MeH.mol2", + "EtH.mol2", + "iPrH.mol2", + "tBuH.mol2", + "OH2.mol2", + "OMeH.mol2", + "NHAcH.mol2", + "NH3.mol2", + "NMe2H.mol2", + "CF3H.mol2", + "HCN.mol2", + "HNO2.mol2", + "HCO2Me.mol2", + "FH.mol2", + "ClH.mol2", + "BrH.mol2", + "IH.mol2", + "SF5H.mol2", + "SO3HH.mol2", + "AcH.mol2", + "CHOH.mol2", +] + +names = [ + ["methyl", "Me", "CH3",], + ["ethyl", "Et", "C2H5",], + ["isopropyl", "iPr", "iC3H7",], + ["tert-butyl", "tBu", "tC4H9",], + ["hydroxy", "OH",], + ["methoxy", "MeO", "OMe", "CH3O",], + ["acetamido", "NHAc",], + ["amino", "NH2",], + ["dimethylamino", "Me2N", "NMe2",], + ["trifluoromethyl", "CF3",], + ["cyano", "CN",], + ["nitro", "NO2",], + ["carboxylmethyl", "MeO2C", "CO2Me",], + ["fluoro", "F",], + ["chloro", "Cl",], + ["bromo", "Br",], + ["iodo", "I",], + ["pentafluorosulfanyl", "SF5",], + ["sulfonyl", "SO3H",], + ["acetyl", "Ac", "COMe",], + ["formyl", "CHO",], +] + +isomorphic = [ + [[3, 4, 5]], + None, + [[4, 8], [9, 10, 11, 5, 6, 7]], + [[3, 7, 11], [4, 5, 6, 8, 9, 10, 12, 13, 14]], + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, +] + +def load_group(name): + filename = None + iso = None + + for row in names: + if name in row: + filename = filenames[names.index(row)] + iso = isomorphic[names.index(row)] + break + + assert filename is not None, f"can't find name {name}!" + + with pkg_resources.path(groups, filename) as file: + mol = MOL2File.read_file(file).ensemble.molecules[0] + mol.assign_connectivity() + + #### every molecule is set so you need to attach to atom 2 + new_group = Group.new_from_molecule(attach_to=2, molecule=mol, isomorphic=iso) + return new_group + +def group_iterator(symmetric_only=False): + """ + Returns a generator over all *cctk*-predefined groups. + """ + for row, iso in zip(names, isomorphic): + if symmetric_only: + if iso is None: + continue + yield load_group(row[0]) diff --git a/build/lib/cctk/mae_file.py b/build/lib/cctk/mae_file.py new file mode 100644 index 0000000..09d3e08 --- /dev/null +++ b/build/lib/cctk/mae_file.py @@ -0,0 +1,278 @@ +import re +import numpy as np +import networkx as nx + +from cctk import File, Ensemble, ConformationalEnsemble, Molecule +from cctk.helper_functions import get_number + + +class MAEFile(File): + """ + Class representing Maestro ``.mae`` files. + + Attributes: + name (str): name of file + ensemble (Ensemble): ``Ensemble`` or ``ConformationalEnsemble`` object + """ + + def __init__(self, name=None): + if isinstance(name, str): + self.name = name + + @classmethod + def read_file(cls, filename, name=None, **kwargs): + """ + Reads ``.mae`` file and generates a ``MAEFile`` instance. + + Args: + filename (str): path to file + name (str): name of the file + + Returns: + MAEFile object + property names (list) + property_values (list) + """ + + file = MAEFile(name=name) + + (geometries, symbols, bonds, p_names, p_vals, conformers) = cls._read_mae(filename, **kwargs) + atomic_numbers = np.array([get_number(z) for z in symbols], dtype=np.int8) + + if conformers == True: + file.ensemble = ConformationalEnsemble() + else: + file.ensemble = Ensemble() + + for geom in geometries: + file.ensemble.add_molecule(Molecule(atomic_numbers, geom, bonds=bonds.edges)) + + return file, p_names, p_vals + + @classmethod + def _read_mae( + cls, filename, contains_conformers="check", save_memory_for_conformers=True, print_status_messages=False, + ): + """ + Reads uncompressed Macromodel files. + + Args: + filename (str): path to file + contains_conformers (str): one of ``check``, ``True``, or ``False`` + save_memory_for_conformers (Bool): + print_status_messages (Bool): + + Returns: + geometries (np.ndarray): array of 3-tuples of geometries + symbols (np.ndarray): array of atom symbols (str) + bonds (nx.Graph): ``NetworkX`` graph of bond information + property_names: + property_values: + contains_conformers (Bool): whether or not the file contains conformers + """ + # read file + if print_status_messages: + print(f"Reading {filename}...", end="", flush=True) + lines = super().read_file(filename) + if print_status_messages: + print(f"read {len(lines)} lines...", end="", flush=True) + + # initialize arrays + geometries = [] + symbols = [] + bonds = [] + property_names = [] + property_values = [] + this_geometry = None + this_symbols = None + this_bonds = None + this_property_names = None + this_property_values = None + + # parse file + i = 0 + current_block_type = None + while i < len(lines): + # read the current line + line = lines[i] + i += 1 + + # determine if we are in a molecule block + end_of_file = i + 1 == len(lines) + if current_block_type is None and (line.startswith("f_m_ct") or end_of_file): + # store the current results if any + if this_geometry is not None and len(this_geometry) > 0: + geometries.append(this_geometry) + symbols.append(this_symbols) + bonds.append(this_bonds) + property_names.append(this_property_names) + property_values.append(this_property_values) + + # prepare to read a new molecule + current_block_type = "property_names" + this_geometry = [] + this_symbols = [] + this_bonds = None + this_property_names = [] + this_property_values = [] + continue + + # read property names + elif current_block_type == "property_names": + line = line.strip() + if line.startswith("i_m_ct_format"): + next_line = lines[i].strip() + if next_line != ":::": + raise ValueError(f"expected ':::' here but line {i+1} is:\n{next_line}\n") + current_block_type = "property_values" + i += 1 + elif line.startswith(":::"): + raise ValueError(f"expected to see i_m_ct_format as the last property (line {i+1})") + else: + fields = re.split(" +", line) + if len(fields) != 1: + raise ValueError(f"unexpected number of fields in property name line: {line}") + this_property_names.append(line) + + # read property values + elif current_block_type == "property_values": + n_properties = len(this_property_names) + for j in range(n_properties): + this_property_values.append(lines[i + j]) + i += n_properties + current_block_type = "looking_for_geometry1" + + # look for geometry block + elif current_block_type == "looking_for_geometry1": + if line.startswith(" m_atom"): + current_block_type = "looking_for_geometry2" + elif current_block_type == "looking_for_geometry2": + if line.strip() == ":::": + current_block_type = "geometry_block" + + # parse geometry + elif current_block_type == "geometry_block": + line = line.strip() + if line == ":::": + current_block_type = "bond_block" + + # initialize bond connectivity graph + this_bonds = nx.Graph() + n_atoms = len(this_symbols) + this_bonds.add_nodes_from(range(1, n_atoms + 1)) + i += 7 + else: + fields = re.split(" +", line) + x, y, z = float(fields[2]), float(fields[3]), float(fields[4]) + this_geometry.append((x, y, z)) + symbol = fields[-1] + this_symbols.append(symbol) + + # parse bonds + elif current_block_type == "bond_block": + line = line.strip() + if line == ":::": + current_block_type = None + else: + fields = re.split(" +", line) + bond_number, atom1, atom2, bond_order = ( + int(fields[0]), + int(fields[1]), + int(fields[2]), + int(fields[3]), + ) + n_atoms = len(this_geometry) + if not 1 <= atom1 <= n_atoms or not 1 <= atom2 <= n_atoms: + raise ValueError(f"atom number out of range: {line}") + bond_order = int(fields[3]) + if bond_order <= 0: + raise ValueError(f"zero or negative bond order: {line}") + if this_bonds.number_of_edges() != bond_number - 1: + raise ValueError(f"non-sequential bond number (expected {this_bonds.number_of_edges()+1} but got {bond_number})") + if this_bonds.has_edge(atom1, atom2): + current_bond_order = this_bonds[atom1][atom2]["weight"] + if current_bond_order != bond_order: + raise ValueError(f"inconsistent bond order definition: {line}") + this_bonds.add_edge(atom1, atom2, weight=bond_order) + this_bonds.add_edge(atom2, atom1, weight=bond_order) + + # convert to numpy array + geometries = np.array(geometries) + symbols = np.array(symbols) + property_names = np.array(property_names) + property_values = np.array(property_values) + + # determine if these are conformers + if contains_conformers == "check": + contains_conformers = True + for this_symbols, this_bonds in zip(symbols[1:], bonds[1:]): + # must have the same symbols and bonds + if not (symbols[0] == this_symbols).all() or not nx.is_isomorphic(bonds[0], this_bonds): + contains_conformers = False + break + elif isinstance(contains_conformers, bool): + pass + else: + raise ValueError("contains_conformers must be 'check' or boolean") + + # if requested, just store one copy of symbols and bonds + if save_memory_for_conformers and contains_conformers: + symbols = symbols[0] + bonds = bonds[0] + + # return result + n_geometries = len(geometries) + if print_status_messages: + if n_geometries > 1: + if contains_conformers: + n_atoms = len(geometries[0]) + n_bonds = bonds.number_of_edges() + if print_status_messages: + print(f"read {n_geometries} conformers ({n_atoms} atoms and {n_bonds} bonds).") + else: + min_n_atoms = len(geometries[0]) + max_n_atoms = len(geometries[0]) + for geometry in geometries[1:]: + if len(geometry) > max_n_atoms: + max_n_atoms = len(geometry) + elif len(geometry) < min_n_atoms: + min_n_atoms = len(geometry) + min_n_bonds = bonds[0].number_of_edges() + max_n_bonds = bonds[0].number_of_edges() + for this_bonds in bonds[1:]: + if this_bonds.number_of_edges() > max_n_bonds: + max_n_bonds = this_bonds.number_of_edges() + elif this_bonds.number_of_edges() < min_n_bonds: + min_n_bonds = bonds.number_of_edges + if print_status_messages: + print(f"read {n_geometries} unrelated geometries ({min_n_atoms}-{max_n_atoms} atoms and {min_n_bonds}-{max_n_bonds}) bonds).") + else: + n_atoms = len(geometries) + n_bonds = bonds.number_of_edges() + if print_status_messages: + print(f"read one geometry ({n_atoms} atoms and {n_bonds} bonds).") + + # return result + return ( + geometries, + symbols, + bonds, + property_names, + property_values, + contains_conformers, + ) + + def get_molecule(self, num=None): + """ + Returns the last molecule from the ensemble. + + If ``num`` is specified, returns ``self.ensemble.molecules[num]`` + """ + # some methods pass num=None, which overrides setting the default above + if num is None: + num = -1 + + if not isinstance(num, int): + raise TypeError("num must be int") + + return self.ensemble.molecules[num] diff --git a/build/lib/cctk/mol2_file.py b/build/lib/cctk/mol2_file.py new file mode 100644 index 0000000..44dddea --- /dev/null +++ b/build/lib/cctk/mol2_file.py @@ -0,0 +1,351 @@ +import re +import numpy as np +import networkx as nx + +from cctk import File, Ensemble, ConformationalEnsemble, Molecule +from cctk.helper_functions import get_symbol, get_number + + +class MOL2File(File): + """ + Class representing SYBYL ``.mol2`` files. + + Attributes: + name (str): name of file + ensemble (Ensemble): ``Ensemble`` or ``ConformationalEnsemble`` object + """ + + def __init__(self, name=None): + if isinstance(name, str): + self.name = name + + @classmethod + def read_file(cls, filename, name=None, **kwargs): + """ + Reads ``.mol2`` file and generates a ``MOL2File`` instance. + + Args: + filename (str): path to file + name (str): name of the file + + Returns: + MOL2File object + """ + + file = MOL2File(name=name) + + (geometries, all_clean_symbols, all_symbols, all_bonds, conformers) = cls._read_mol2(filename, **kwargs) + assert len(all_bonds) == len(geometries) + for bonds in all_bonds: + assert isinstance(bonds, nx.Graph) + assert len(bonds) == len(geometries[0]) + + if conformers: + # convert atom types to atomic numbers + atomic_numbers = [] + for atom_type in all_symbols[0]: + assert isinstance(atom_type,str), f"unexpected atom_type type: {type(atom_type)} / {atom_type}" + fields = atom_type.split(".") + symbol = fields[0] + symbol = re.sub("[^A-Za-z]","",symbol) + atomic_number = get_number(symbol) + atomic_numbers.append(atomic_number) + atomic_numbers = np.asarray(atomic_numbers, dtype=np.int8) + + # create ensemble + file.ensemble = ConformationalEnsemble() + for geometry in geometries: + molecule = Molecule(atomic_numbers, geometry, bonds=all_bonds[0].edges, checks=False) + file.ensemble.add_molecule(molecule, checks=False) + else: + file.ensemble = Ensemble() + for this_symbols,geometry in zip(all_symbols,geometries): + atomic_numbers=[] + for atom_type in this_symbols: + assert isinstance(atom_type,str), f"unexpected atom_type type: {type(atom_type)} / {atom_type}" + fields = atom_type.split(".") + symbol = fields[0] + symbol = re.sub("[^A-Za-z]","",symbol) + atomic_number = get_number(symbol) + atomic_numbers.append(atomic_number) + atomic_numbers = np.asarray(atomic_numbers, dtype=np.int8) + molecule = Molecule(atomic_numbers, geometry, bonds=bonds.edges) + file.ensemble.add_molecule(molecule) + + return file + + @classmethod + def _read_mol2( + cls, filename, contains_conformers="check", save_memory_for_conformers=True, print_status_messages=False, + ): + """ + Reads .mol2 files into cctk. + + Args: + filename str): the name of the .mol2 file + + contains_conformers('check' or bool): if set to 'check', multiple geometries + in the same file will be compared to see + if they are conformers. Alternatively, + force the geometries to be treated as + conformers (True) or not (False). This + latter option increases performance, + particularly for large files. + + print_status_messages (bool): if True, update the progerss of the parsing operation to stdout. + + Returns: + all_geometries, all_clean_symbols, all_symbols, all_bonds, contains_conformers + + all_geometries: np.ndarray(geometry number, atom number, xyz) -> position (float) + all_clean_symbols: np.ndarray(geometry number, atom number) -> atom symbol (:obj:`str`) + all_symbols: np.ndarray(geometry number, atom number) -> atom symbol (:obj:`str`) + all_bonds: list(geometry_number) -> bond connectivity (:obj:`nx.Graph`) + contains_conformers: bool (True if the geometries correspond to conformers.) + """ + # read file + if print_status_messages: + print(f"Reading {filename}...", end="", flush=True) + lines = super().read_file(filename) + if print_status_messages: + print(f"read {len(lines)} lines...", end="", flush=True) + + # initialize arrays + all_geometries = [] + all_symbols = [] + all_clean_symbols = [] + all_bonds = [] + this_geometry = [] + this_symbols = [] + this_clean_symbols = [] + this_bonds = None + + # parse file + i = 0 + in_geometry_block = False + in_bond_block = False + bond_number = 0 + while i < len(lines): + # read the current line + line = lines[i] + + # determine if we are in a geometry block + if line.startswith("@ATOM"): + # step forward to the first geometry line + in_geometry_block = True + in_bond_block = False + i += 1 + line = lines[i] + if contains_conformers == True and len(all_symbols) > 0: + this_symbols = all_symbols[0] + this_clean_symbols = all_clean_symbols[0] + elif line.startswith("@BOND"): + # update status + in_geometry_block = False + in_bond_block = True + bond_number = 0 + + # get next line + i += 1 + line = lines[i] + + # initialize connectivity graph + if len(this_geometry) == 0: + raise ValueError("got to bond table without a geometry") + if contains_conformers == True and len(all_bonds) > 0: + this_bonds = all_bonds[0] + else: + this_bonds = nx.Graph() + this_bonds.add_nodes_from(range(1, len(this_geometry) + 1)) + + # parse geometry if appropriate + if in_geometry_block: + fields = line.split() + if len(fields) < 6: + print("Error parsing file:") + print("Line = '%s'" % line.strip()) + print(fields) + break + x, y, z = float(fields[2]), float(fields[3]), float(fields[4]) + this_geometry.append([x, y, z]) + if contains_conformers != True or len(all_symbols)==0: + symbol = fields[5] + clean_symbol = fields[1] + this_symbols.append(symbol) + this_clean_symbols.append(clean_symbol) + elif in_bond_block: + fields = line.split() + if len(fields) == 4 and (len(all_bonds)==0 or contains_conformers != True): + # parse bonds, checking that the bonds are increasing + try: + this_bond_number = int(fields[0]) + atom1 = int(fields[1]) + atom2 = int(fields[2]) + n_atoms = len(this_geometry) + if not 1 <= atom1 <= n_atoms or not 1 <= atom2 <= n_atoms: + raise ValueError(f"atom number out of range: {line}") + if fields[3] == "ar": + bond_order = 1 + else: + bond_order = int(fields[3]) + if bond_order <= 0: + raise ValueError(f"zero or negative bond order: {line}") + if this_bond_number != bond_number + 1: + raise ValueError("non-sequential bond number") + bond_number = this_bond_number + if this_bonds.has_edge(atom1, atom2): + current_bond_order = this_bonds[atom1][atom2]["weight"] + if current_bond_order != bond_order: + raise ValueError(f"inconsistent bond order definition: {line}") + this_bonds.add_edge(atom1, atom2, weight=bond_order) + this_bonds.add_edge(atom2, atom1, weight=bond_order) + except Exception as e: + # assume we have left the bond block + in_geometry_block = False + in_bond_block = False + else: + # we have left the bond block + in_geometry_block = False + in_bond_block = False + + # go to next line + i += 1 + + # store geometry and reinitialize if appropriate + end_of_file = i == len(lines) + end_of_blocks = not in_geometry_block and not in_bond_block + if (end_of_file or end_of_blocks) and len(this_geometry) > 0: + all_geometries.append(this_geometry) + all_clean_symbols.append(this_clean_symbols) + all_symbols.append(this_symbols) + all_bonds.append(this_bonds) + this_geometry = [] + this_symbols = [] + this_clean_symbols = [] + this_bonds = None + + # convert to numpy array + all_geometries = np.array(all_geometries) + all_symbols = np.array(all_symbols) + all_clean_symbols = np.array(all_clean_symbols) + + # determine if these are conformers + if contains_conformers == "check": + contains_conformers = True + for symbols, bonds in zip(all_symbols[1:], all_bonds[1:]): + # must have the same symbols and bonds + if not (all_symbols[0] == symbols).all() or not nx.is_isomorphic(all_bonds[0], bonds): + contains_conformers = False + break + elif isinstance(contains_conformers, bool): + pass + else: + raise ValueError("contains_conformers must be 'check' or boolean") + + # return result + n_geometries = len(all_geometries) + if print_status_messages: + if n_geometries > 1: + if contains_conformers: + n_atoms = len(all_geometries[0]) + n_bonds = all_bonds[0].number_of_edges() + if print_status_messages: + print(f"read {n_geometries} conformers ({n_atoms} atoms and {n_bonds} bonds).") + else: + min_n_atoms = len(all_geometries[0]) + max_n_atoms = len(all_geometries[0]) + for geometry in all_geometries[1:]: + if len(geometry) > max_n_atoms: + max_n_atoms = len(geometry) + elif len(geometry) < min_n_atoms: + min_n_atoms = len(geometry) + min_n_bonds = all_bonds[0].number_of_edges() + max_n_bonds = all_bonds[0].number_of_edges() + for bonds in all_bonds[1:]: + if bonds.number_of_edges() > max_n_bonds: + max_n_bonds = bonds.number_of_edges() + elif bonds.number_of_edges() < min_n_bonds: + min_n_bonds = bonds.number_of_edges + if print_status_messages: + print(f"read {n_geometries} unrelated geometries ({min_n_atoms}-{max_n_atoms} atoms and {min_n_bonds}-{max_n_bonds}) bonds).") + else: + n_atoms = len(all_geometries) + n_bonds = all_bonds[0].number_of_edges() + if print_status_messages: + print(f"read one geometry ({n_atoms} atoms and {n_bonds} bonds).") + + return (all_geometries, all_clean_symbols, all_symbols, all_bonds, contains_conformers) + + def get_molecule(self, num=None): + """ + Returns the last molecule from the ensemble. + + If ``num`` is specified, returns ``self.ensemble.molecules[num]`` + """ + # some methods pass num=None, which overrides setting the default above + if num is None: + num = -1 + + if not isinstance(num, int): + raise TypeError("num must be int") + + return self.ensemble.molecules[num] + + @classmethod + def write_molecule_to_file(cls, filename, molecule, title=None, append=False): + """ + Write a ``.gjf`` file using the given molecule. + + Args: + filename (str): path to the new file + molecule (Molecule): which molecule to use -- a``Molecule`` object. + title (str): title of the file + append (Bool): whether to write to file normally or append + """ + assert isinstance(molecule, Molecule), "molecule is not a valid Molecule object!" + + text = f"# {title}\n#\n#\n\n#\n#\n\n" + text += f"@MOLECULE\n{title}\n{molecule.num_atoms()} {molecule.bonds.number_of_edges()}\nSMALL\nNO_CHARGES\n\n\n" + text += "@ATOM\n" + for idx, z in enumerate(molecule.atomic_numbers, start=1): + v = molecule.get_vector(idx) + text += f"{idx} {get_symbol(z)}{idx} {v[0]: .4f} {v[1]: .4f} {v[2]: .4f} {get_symbol(z)} 0\n" + text += "@BOND\n" + count = 1 + for atom1, atom2, weight in molecule.bonds.edges.data("weight", default=1): + text += f"{count} {atom1} {atom2} {weight}\n" + count += 1 + + if append: + super().append_to_file(filename, text) + else: + super().write_file(filename, text) + + def write_file(self, filename, molecule=-1, **kwargs): + """ + Write a ``.mol2`` file, using object attributes. + + Args: + filename (str): path to the new file + molecule (int): which molecule to use -- passed to ``self.get_molecule()``. + Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble.molecules (0-indexed). + A ``Molecule`` object can also be passed, in which case that molecule will be written to the file. + """ + if molecule is None or isinstance(molecule, (np.integer, int)): + molecule = self.ensemble.molecules[molecule] + self.write_molecule_to_file(filename, molecule, **kwargs) + + @classmethod + def write_ensemble_to_file(cls, filename, ensemble): + """ + Write each structure in the specified ensemble to a single mol2 file. + + Args: + filename (str): where to write the file + ensemble (Ensemble): ``Ensemble`` object to write + """ + for idx, molecule in enumerate(ensemble.molecules): + if idx == 0: + cls.write_molecule_to_file(filename, molecule, append=False) + else: + cls.write_molecule_to_file(filename, molecule, append=True) diff --git a/build/lib/cctk/molecule.py b/build/lib/cctk/molecule.py new file mode 100644 index 0000000..2ccd3d3 --- /dev/null +++ b/build/lib/cctk/molecule.py @@ -0,0 +1,1832 @@ +import math, copy, re +import numpy as np +import networkx as nx +from scipy.spatial.distance import cdist +import pkg_resources +import yaml + +import cctk +from cctk.helper_functions import ( + get_symbol, + get_number, + compute_rotation_matrix, + compute_distance_between, + compute_angle_between, + compute_dihedral_between, + compute_unit_vector, + get_covalent_radius, + get_vdw_radius, + numpy_to_bytes, + bytes_to_numpy, + _recurse_through_formula, +) +import cctk.topology as top + +class Molecule: + """ + Class representing a single molecular geometry. + + In contrast to typical Python behavior, ``atomic_numbers`` and ``geometry`` are indexed from one, to simplify interfacing with computational chemistry programs. + This has been done by defining a custom wrapper for ``numpy.ndarray`` called ``cctk.OneIndexedArray``. + + All other datatypes are indexed from 0. + + Attributes: + name (str): for identification, optional + atomic_numbers (cctk.OneIndexedArray, dtype=np.int8): list of atomic numbers + geometry (cctk.OneIndexedArray): list of 3-tuples of xyz coordinates - same ordering as ``atomic_numbers`` + bonds (nx.Graph or list of tuples): connectivity graph or list of 2-tuples, with each element representing the 1-indexed atom number of a bonded pair + charge (int): the charge of the molecule + multiplicity (int): the spin state of the molecule (1 corresponds to singlet, 2 to doublet, 3 to triplet, etc. -- so a multiplicity of 1 is equivalent to S=0) + vibrational_modes (list of cctk.VibrationalMode): vibrational modes + """ + + def __init__(self, atomic_numbers, geometry, name=None, bonds=None, charge=0, multiplicity=1, checks=True): + """ + Create new Molecule object, and assign connectivity if needed. + + ``bonds`` must be a list of edges (i.e. an n x 2 ``numpy`` array). + + If ``checks`` is True, the atomic numbers in bonds will all be checked for consistency. + This option can be disabled by setting ``checks`` to False, but this is not recommended for external data. + """ + if len(atomic_numbers) != len(geometry): + raise ValueError(f"length of geometry ({len(geometry)}) and atomic_numbers ({len(atomic_numbers)}) does not match!\n{atomic_numbers}\n{geometry}") + + try: + atomic_numbers = np.asarray(atomic_numbers, dtype=np.int8).view(cctk.OneIndexedArray) + except Exception as e: + raise ValueError("invalid atom list") + + try: + geometry = np.array(geometry, dtype=np.float32).view(cctk.OneIndexedArray) + except Exception as e: + raise TypeError("geometry cannot be cast to ``np.ndarray`` of floats!") + + if name is not None: + if not isinstance(name, str): + raise TypeError("name must be a string!") + + if not isinstance(charge, int): + try: + charge = int(charge) + except Exception as e: + raise TypeError("charge must be integer or castable to integer!") + + if not isinstance(multiplicity, int): + try: + multiplicity = int(multiplicity) + except Exception as e: + raise TypeError("multiplicity must be positive integer or castable to positive integer") + assert multiplicity > 0, "multiplicity must be positive" + + self.atomic_numbers = atomic_numbers + self.geometry = geometry + + self.name = name + self.multiplicity = multiplicity + self.charge = charge + + self.vibrational_modes = list() + + if isinstance(bonds, nx.Graph): + self.bonds = bonds + elif isinstance(bonds, (list,np.ndarray,nx.classes.reportviews.EdgeView)): + if checks: + known_atomic_numbers = set() + for bond in bonds: + assert len(bond)==2, f"unexpected number of atoms in bond, expected 2, got {len(bond)}" + if bond[0] not in known_atomic_numbers: + self._check_atom_number(bond[0]) + known_atomic_numbers.add(bond[0]) + if bond[1] not in known_atomic_numbers: + self._check_atom_number(bond[1]) + known_atomic_numbers.add(bond[1]) + + self.bonds = nx.Graph() + self.bonds.add_nodes_from(range(1, len(atomic_numbers) + 1)) + self.bonds.add_edges_from(bonds, weight=1) + elif bonds is None: + self.bonds = nx.Graph() + self.bonds.add_nodes_from(range(1, len(atomic_numbers)+1)) + else: + raise ValueError(f"unexpected type for bonds: {type(bonds)}") + + def __str__(self): + if self.name is not None: + return f"Molecule (name={self.name}, {len(self.atomic_numbers)} atoms)" + else: + return f"Molecule ({len(self.atomic_numbers)} atoms)" + + def __repr__(self): + return str(self) # placeholder + +# def __eq__(self, other): + @classmethod + def equal(cls, mol1, mol2): + """ + Atomic numbers, geometry, charge, and multiplicity all must match. Name is irrelevant. + """ + if not isinstance(mol1, cctk.Molecule): + return False + + if not isinstance(mol2, cctk.Molecule): + return False + + comparisons = [ + np.array_equal(mol1.atomic_numbers, mol2.atomic_numbers), + np.array_equal(mol1.geometry, mol2.geometry), + mol1.charge == mol2.charge, + mol1.multiplicity == mol2.multiplicity + ] + + return all(comparisons) + + def assign_connectivity(self, cutoff=0.2, periodic_boundary_conditions=None): + """ + Automatically recalculates bonds based on covalent radii. If two atoms are closer than the sum of their covalent radii + ``cutoff`` Angstroms, + then they are considered bonded. + + Args: + cutoff (float): the threshold (in Angstroms) for how close two covalent radii must be to be considered bonded + + Returns: + self + """ + + #### delete all edges + self.bonds = nx.create_empty_copy(self.bonds) + + assert isinstance(cutoff, (float, int)), "need cutoff to be numeric!" + g = self.geometry.view(np.ndarray) + + dist_matrix = None + + #### cdist is SO FAST + if periodic_boundary_conditions is None: + dist_matrix = cdist(g, g, "euclidean") + else: + # even 16 cdist calls is faster than any other implementation, i tested it + pbc = periodic_boundary_conditions + assert isinstance(pbc, np.ndarray) and len(pbc) == 3, "Need 3-element ``np.ndarray`` for PBCs" + + nearby_cells = [ + [0, 0, 0], + [pbc[0], 0, 0], + [0, pbc[1], 0], + [0, 0, pbc[2]], + [pbc[0], pbc[1], 0], + [pbc[0], 0, pbc[2]], + [0, pbc[1], pbc[2]], + [pbc[0], pbc[1], pbc[2]], + ] + + dist_matrices = [cdist(g, g + np.array(nc), "euclidean") for nc in nearby_cells] + dist_matrices += [cdist(g, g - np.array(nc), "euclidean") for nc in nearby_cells] + distances_3d = np.stack(dist_matrices) + dist_matrix = distances_3d.min(axis=0) + + covalent_radii = {z: get_covalent_radius(z) for z in set(self.atomic_numbers)} + radii_by_num = [covalent_radii[z] for z in self.atomic_numbers] + + for i in range(1, self.num_atoms() + 1): + r_i = radii_by_num[i-1] + for j in range(i + 1, self.num_atoms() + 1): + distance = dist_matrix[i-1][j-1] + r_j = radii_by_num[j-1] + + # 0.5 A distance is used by RasMol and Chime (documentation available online) and works well, empirically + if distance < (r_i + r_j + cutoff): + self.add_bond(i, j) + + return self + + def check_for_conflicts(self, min_buffer=1, group1=None, group2=None): + """ + Automatically checks for conflicts based on covalent radii. If two atoms are closer than the sum of their covalent radii + buffer, then they are considered clashing. + If `group1` and `group2` are selected, then conflicts will only be evaluated between these two groups of atoms. + + Args: + min_buffer (float): the threshold (in Angstroms) for how close two covalent radii must be to be considered clashing. 1.0 A is default, empirically. + group1 (list): atoms to evaluate against `group2` (if `None`, defaults to all atoms) + group2 (list): atoms to evaluate against `group1` (if `None`, defaults to all atoms) + + Returns: + True if there are no conflicts + ValueError if there is a conflict + """ + + if group1 is None: + group1 = list(range(1, self.num_atoms() + 1)) + + if group2 is None: + group2 = list(range(1, self.num_atoms() + 1)) + + for atom in group1 + group2: + self._check_atom_number(atom) + + for i in group1: + for j in group2: + if i == j: + continue + distance = self.get_distance(i, j, check=False) + r_i = get_covalent_radius(self.get_atomic_number(i)) + r_j = get_covalent_radius(self.get_atomic_number(j)) + + # 0.5 A distance is used by RasMol and Chime (documentation available online) and works well, empirically + if distance < (r_i + r_j - min_buffer): +# raise ValueError(f"atoms {i} and {j} are too close - distance {distance} A!") + return False + + return True + + def add_bond(self, atom1, atom2, bond_order=1, check=True): + """ + Adds a new bond to the bond graph, or updates the existing bond order. Will not throw an error if the bond already exists. + + Args: + atom1 (int): the number of the first atom + atom2 (int): the number of the second atom + bond_order (int): bond order of bond between atom1 and atom2 + """ + if check: + self._check_atom_number(atom1) + self._check_atom_number(atom2) + assert isinstance(bond_order, int), f"bond order {bond_order} must be an integer" + assert bond_order >= 0, f"bond order {bond_order} must be positive" + + if self.bonds.has_edge(atom1, atom2): + if bond_order == 0: + self.bonds.remove_edge(atom1, atom2) + else: + if self.bonds[atom1][atom2]["weight"] != bond_order: + self.bonds[atom1][atom2]["weight"] = bond_order + elif bond_order > 0: + self.bonds.add_edge(atom1, atom2, weight=bond_order) + + def remove_bond(self, atom1, atom2): + """ + Alias for ``self.add_bond(atom1, atom2, bond_order=0)`` -- more intuitive nomenclature. + """ + self.add_bond(atom1, atom2, bond_order=0) + + def _check_atom_number(self, number): + """ + Helper method which performs quick checks on the validity of a given atom number. + """ + assert isinstance(number, int), "atomic number must be integer" + assert 0 < number <= self.num_atoms(), "atom number {number} too large! (or too small - needs to be >0)" + + def formula(self, return_dict=False): + """ + Returns the atomic formula. + + If ``return_dict`` is ``True``, then returns a ``dictionary`` with keys elemental symbols and values the number of occurrences. + + For instance, ``water.formula()`` would return ``{'O': 1, 'H': 2}``. + + If ``return_dict`` is ``False``, then returns a stringified version of the formula according to standard rules. + + For instance, ``water.formula()`` would return ``H2O``. + + Args: + return_dict (Bool): if the method should return a string or a dictionary + + Returns: + a dictionary or string representing the molecule's formula + """ + + formula_dict = {} + for atom in self.atomic_numbers: + symbol = get_symbol(atom) + if symbol in formula_dict: + formula_dict[symbol] += 1 + else: + formula_dict[symbol] = 1 + if return_dict == True: + return formula_dict + else: + formula = "" + elements = list(formula_dict.keys()) + + #### H and C always come first + if "C" in elements: + elements.remove("C") + formula += f"C{formula_dict['C']}" + + if "H" in elements: + elements.remove("H") + formula += f"H{formula_dict['H']}" + + for element in sorted(elements): + formula += f"{element}{formula_dict[element]}" + + return formula + + def _get_bond_fragments(self, atom1, atom2): + """ + Returns the pieces of a molecule that one would obtain by ereaking the bond between two atoms. Will throw ``ValueError`` if the atoms are in a ring. + Useful for distance/angle/dihedral scans -- one fragment can be moved and the other held constant. + + Args: + atom1 (int): the number of the first atom + atom2 (int): the number of the second atom + + Returns: + fragment1: the list of atoms in fragment 1 (containing atom1) + fragment2: the list of atoms in fragment 2 (containing atom2) + + """ + + self._check_atom_number(atom1) + self._check_atom_number(atom2) + + assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + + bond_order = self.get_bond_order(atom1, atom2) + if self.bonds.has_edge(atom1, atom2): + self.bonds.remove_edge(atom1, atom2) + + fragments = nx.connected_components(self.bonds) + fragment1 = [] + fragment2 = [] + + for fragment in fragments: + if atom1 in fragment: + if atom2 in fragment: + self.add_bond(atom1, atom2, bond_order) # not adding back this bond causes some pretty pernicious errors + raise ValueError(f"Atom {atom1} and atom {atom2} are in a ring or otherwise connected!") + else: + fragment1 = fragment + if atom2 in fragment: + fragment2 = fragment + + self.add_bond(atom1, atom2, bond_order) + return list(fragment1), list(fragment2) + else: + raise ValueError(f"No bond between atom {atom1} and atom {atom2}!") + + def _get_fragment_containing(self, atom): + """ + Get the fragment containing the atom with number ``atom``. + + Args: + atom (int): the number of the atom + + Returns: + a list of all the atoms in the fragment + """ + + self._check_atom_number(atom) + + fragments = nx.connected_components(self.bonds) + + for fragment in fragments: + if atom in fragment: + return list(fragment) + + def set_distance(self, atom1=None, atom2=None, distance=None, move="group", atoms=None): + """ + Adjusts the ``atom1`` -- ``atom2`` bond length to be a fixed distance by moving atom2. + + If ``move`` is set to "group", then all atoms bonded to ``atom2`` will also be moved. + + If ``move`` is set to "atom", then only atom2 will be moved. + + Args: + atom1 (int): the number of the first atom + atom2 (int): the number of the second atom + distance (float): distance in Angstroms of the final bond + move (str): determines how fragment moving is handled + atoms (list): 2-element list of atom numbers + + Returns: + the Molecule object + """ + + if (atom1 is None) and (atom2 is None): + assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" + assert len(atoms) == 2, "need 2 atom numbers to set distance" + atom1 = atoms[0] + atom2 = atoms[1] + + assert isinstance(distance, (float, int, np.number)), "need distance to set distance" + + self._check_atom_number(atom1) + self._check_atom_number(atom2) + + if (not isinstance(distance, float)) or (distance < 0): + raise ValueError(f"invalid value {distance} for distance!") + + atoms_to_move = [] + if move == "group": + if self.get_bond_order(atom1, atom2): + _, atoms_to_move = self._get_bond_fragments(atom1, atom2) + else: + atoms_to_move = self._get_fragment_containing(atom2) + elif move == "atom": + atoms_to_move = [atom2] + else: + raise ValueError(f"Invalid option {move} for parameter 'move'!") + + if (atom1 in atoms_to_move and atom2 in atoms_to_move) and move == "group": + raise ValueError('both our atoms are connected which will preclude any movement with ``move`` set to "group"') + + current_distance = self.get_distance(atom1, atom2) + + v1 = self.get_vector(atom1) + v2 = self.get_vector(atom2) + vb = v2 - v1 + + if np.linalg.norm(vb) - current_distance > 0.00001: + raise ValueError(f"Error calculating bond distance!") + + #### move all the atoms + delta = distance - current_distance + unitv = compute_unit_vector(vb) + for atom in atoms_to_move: + self.geometry[atom] = self.geometry[atom] + (delta * unitv) + + #### check everything worked okay... + v1f = self.get_vector(atom1) + v2f = self.get_vector(atom2) + vbf = v2f - v1f + + if np.linalg.norm(vbf) - distance > 0.001: + new_dist = np.linalg.norm(vbf) + raise ValueError(f"Error moving bonds -- new distance is {new_dist:.3f}. Operation failed!") + + return self + + def set_angle(self, atom1=None, atom2=None, atom3=None, angle=None, move="group", atoms=None): + """ + Adjusts the ``atom1`` -- ``atom2`` -- ``atom3`` bond angle to be a fixed value by moving ``atom3``. + + If `move` is set to "group", then all atoms bonded to ``atom3`` will also be moved. + + If `move` is set to "atom", then only ``atom3`` will be moved. + + Args: + atom1 (int): the number of the first atom + atom2 (int): the number of the second atom + atom3 (int): the number of the third atom + angle (float): final value in degrees of the ``atom1`` -- ``atom2`` -- ``atom3`` angle + move (str): determines how fragment moving is handled + atoms (list): 3-element list of atom numbers + + Returns: + the Molecule object + """ + + if (atom1 is None) and (atom2 is None) and (atom3 is None) : + assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" + assert len(atoms) == 3, "need 3 atom numbers to set angle" + atom1 = atoms[0] + atom2 = atoms[1] + atom3 = atoms[2] + + assert isinstance(angle, (float, int, np.number)), "need angle to set angle" + + self._check_atom_number(atom1) + self._check_atom_number(atom2) + self._check_atom_number(atom3) + + if self.get_distance(atom1, atom2) < 0.01: + raise ValueError(f"atom {atom1} and atom {atom2} are too close!") + + if self.get_distance(atom2, atom3) < 0.01: + raise ValueError(f"atom {atom2} and atom {atom3} are too close!") + + if self.get_distance(atom1, atom3) < 0.01: + raise ValueError(f"atom {atom1} and atom {atom3} are too close!") + + try: + angle = float(angle) + except Exception as e: + raise TypeError(f"angle {angle} cannot be converted to float!") + + if (not isinstance(angle, float)) or ((angle < 0) or (angle > 360)): + raise ValueError(f"invalid value {angle} for angle!") + + atoms_to_move = [] + if move == "group": + if self.get_bond_order(atom2, atom3): + _, atoms_to_move = self._get_bond_fragments(atom2, atom3) + elif self.are_connected(atom2, atom3): + raise ValueError( + f"atom {atom2} and atom {atom3} are connected but not bonded -- cannot adjust angle! try manually removing one or more bonds." + ) + else: + atoms_to_move = self._get_fragment_containing(atom3) + elif move == "atom": + atoms_to_move = [atom3] + else: + raise ValueError(f"Invalid option {move} for parameter 'move'!") + + if atom1 in atoms_to_move: + raise ValueError( + f"atom {atom1} and atom {atom3} are connected in multiple ways -- cannot adjust angle! try manually removing one or more bonds." + ) + + current_angle = self.get_angle(atom1, atom2, atom3) + delta = angle - current_angle + + if np.abs(delta) < 0.001: + return + + #### now the real work begins... + + #### move everything to place atom2 at the origin + v2 = self.get_vector(atom2) + self.translate_molecule(-v2) + + v1 = self.get_vector(atom1) + v3 = self.get_vector(atom3) + + #### perform the actual rotation + rot_axis = np.cross(v1, v3) + rot_matrix = compute_rotation_matrix(rot_axis, delta) + for atom in atoms_to_move: + self.geometry[atom] = np.dot(rot_matrix, self.get_vector(atom)) + + #### and move it back! + self.translate_molecule(v2) + + final_angle = self.get_angle(atom1, atom2, atom3) + + #### need to compare cosines to prevent insidious phase difficulties (like 0.00 and 359.99) + if np.abs(math.cos(math.radians(final_angle)) - math.cos(math.radians(angle))) > 0.001: + raise ValueError(f"Error rotating atoms -- expected angle {angle}, got {final_angle} -- operation failed!") + + return self + + def set_dihedral(self, atom1=None, atom2=None, atom3=None, atom4=None, dihedral=None, move="group34", check_result=True, atoms=None): + """ + Adjusts the ``atom1`` -- ``atom2`` -- ``atom3`` -- ``atom4`` dihedral angle to be a fixed value by moving atom 4. + + If ``move`` is set to "atom", then only ``atom4`` will be moved. + + If ``move`` is set to "group4", then all atoms bonded to ``atom4`` will also be moved. + + If ``move`` is set to "group34", then all atoms bonded to ``atom3`` and ``atom4`` will also be moved. + + Args: + atom1 (int): the number of the first atom + atom2 (int): the number of the second atom + atom3 (int): the number of the third atom + atom4 (int): the number of the fourth atom + dihedral (float): final value in degrees of the ``atom1`` -- ``atom2`` -- ``atom3`` -- ``atom4`` angle + move (str): determines how fragment moving is handled + check_result (Bool): whether the final answer should be checked for correctness + atoms (list): 4-element list of atomic numbers + + Returns: + the Molecule object + """ + + if (atom1 is None) and (atom2 is None) and (atom3 is None) and (atom4 is None): + assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" + assert len(atoms) == 4, "need 4 atom numbers to set dihedral" + atom1 = atoms[0] + atom2 = atoms[1] + atom3 = atoms[2] + atom4 = atoms[3] + + assert isinstance(dihedral, (float, int, np.number)), "need angle to set dihedral angle" + + # check atom numbers + self._check_atom_number(atom1) + self._check_atom_number(atom2) + self._check_atom_number(atom3) + self._check_atom_number(atom4) + + # check there is bond connectivity information + assert len(self.bonds) > 0, "no bond connectivity information" + + # check for collinearity + angle = self.get_angle(atom1, atom2, atom3, check=False) + assert 0.0001 < angle < 179.9999, f"1/2/3 atoms {atom1}-{atom2}-{atom3} are collinear (angle={angle:.8f})" + angle = self.get_angle(atom2, atom3, atom4, check=False) + assert 0.0001 < angle < 179.9999, f"2/3/4 atoms {atom2}-{atom3}-{atom4} are collinear (angle={angle:.8f})" + + for x in [atom1, atom2, atom3, atom4]: + for y in [atom1, atom2, atom3, atom4]: + if x <= y: + continue + else: + if self.get_sq_distance(x, y, check=False) < 0.001: + raise ValueError(f"atom {x} and atom {y} are too close!") + + try: + dihedral = float(dihedral) + except Exception as e: + raise TypeError(f"dihedral angle {dihedral} cannot be converted to float!") + + if (not isinstance(dihedral, float)) or ((dihedral < 0) or (dihedral > 360)): + raise ValueError(f"invalid value {dihedral} for dihedral angle!") + + atoms_to_move = [] + if move == "group34": + #### add atom3's fragment to atom4 + if self.get_bond_order(atom2, atom3): + _, atoms_to_move = self._get_bond_fragments(atom2, atom3) + elif self.are_connected(atom2, atom3): + raise ValueError( + f"atom {atom2} and atom {atom3} are connected but not bonded -- cannot adjust dihedral angle! try manually removing one or more bonds." + ) + else: + atoms_to_move = self._get_fragment_containing(atom3) + + #### and make sure atom4 is in there too! + if atom4 not in atoms_to_move: + atoms_to_move += self._get_fragment_containing(atom4) + elif move == "group4": + if self.get_bond_order(atom3, atom4): + _, atoms_to_move = self._get_bond_fragments(atom3, atom4) + elif self.are_connected(atom3, atom4): + raise ValueError( + f"atom {atom3} and atom {atom4} are connected but not bonded -- cannot adjust dihedral angle! try manually removing one or more bonds." + ) + else: + atoms_to_move = self._get_fragment_containing(atom4) + elif move == "atom": + atoms_to_move = [atom4] + else: + raise ValueError(f"Invalid option {move} for parameter 'move'!") + + if atom1 in atoms_to_move: + raise ValueError( + f"atom {atom1} and atom {atom4} are connected in multiple ways -- cannot adjust dihedral angle! try manually removing one or more bonds." + ) + + if atom2 in atoms_to_move: + raise ValueError( + f"atom {atom2} and atom {atom4} are connected in multiple ways -- cannot adjust dihedral angle! try manually removing one or more bonds." + ) + + if atom4 not in atoms_to_move: + raise ValueError(f"atom {atom4} is not going to be moved... this operation is doomed to fail!") + + current_dihedral = self.get_dihedral(atom1, atom2, atom3, atom4, check=False) + delta = (dihedral - current_dihedral) % 360 + + if np.abs(delta) < 0.001: + return self + + #### now the real work begins... + #### move everything to place atom2 at the origin + v3 = self.get_vector(atom3, check=False) + self.translate_molecule(-v3) + + #### perform the actual rotation + rot_matrix = compute_rotation_matrix(-self.get_vector(atom2, check=False), delta) + + for atom in atoms_to_move: + self.geometry[atom] = np.dot(rot_matrix, self.get_vector(atom, check=False)) + + #### and move it back! + self.translate_molecule(v3) + + if check_result: + final_dihedral = self.get_dihedral(atom1, atom2, atom3, atom4, check=False) + + #### need to compare cosines to prevent insidious phase difficulties (like 0.00 and 359.99) + #### this will throw ValueError for differences of about 2 degrees + if np.abs(math.cos(math.radians(final_dihedral)) - math.cos(math.radians(dihedral))) > 0.001: + raise ValueError(f"Error rotating atoms -- expected dihedral angle {dihedral}, got {final_dihedral} -- operation failed!") + + return self + + def translate_molecule(self, vector): + """ + Translates the whole molecule by the given vector. + + Args: + vector (vector): the vector to translate by + + Returns: + the Molecule object + """ +# for atom in range(1, self.num_atoms() + 1): +# self.geometry[atom] = self.geometry[atom] + vector + + self.geometry += vector + + return self + + def rotate_molecule(self, axis, degrees): + """ + Rotates the whole molecule around the given axis. + + Args: + axis (vector): the vector to rotate about + theta (float): how much to rotate (in degrees) + + Returns: + the Molecule object + """ + rot_matrix = compute_rotation_matrix(axis, degrees) + + for atom in range(1, self.num_atoms() + 1): + self.geometry[atom] = np.dot(rot_matrix, self.geometry[atom]) + + return self + + def calculate_mass_spectrum(self, **kwargs): + """ + Generates list of m/z values. + + Final weights rounded to one decimal point (because of low-res MS). + """ + form_vec = np.zeros(shape=92, dtype=np.int8) + for z in self.atomic_numbers: + form_vec[z] += 1 + + masses, weights = _recurse_through_formula(form_vec, [0], [1], **kwargs) + + new_masses, indices = np.unique(np.round(masses, decimals=1), return_inverse=True) + new_weights = np.zeros_like(new_masses) + for k in range(len(new_weights)): + new_weights[k] = np.sum(weights[np.nonzero(indices == k)]) + new_weights = new_weights / np.max(new_weights) + + return new_masses, new_weights + + def add_atom_at_centroid(self, symbol, atom_numbers, weighted=False): + """ + Adds atom with symbol ``symbol`` at the centroid of the atoms in ``atom_numbers``. + + If ``weighted`` is ``True``, then the centroid calculation will take into account the atomic numbers of the atoms in question (placing the atom closer to more massive atoms). + + Otherwise, the average is unweighted. + + Args: + symbol (str): the atomic symbol of the atom to be added + atom_numbers (list): which atoms to put the new atom between + weighted (Bool): if the centroid calculation should be weighted (see above) + + Returns: + the Molecule object + """ + + if (not isinstance(atom_numbers, list)) or (len(atom_numbers) < 2): + raise TypeError("atom_numbers must be list with at least two elements") + + if not isinstance(symbol, str): + raise TypeError(f"symbol {symbol} must be a string!") + + coords = [None] * len(atom_numbers) + weights = [1] * len(atom_numbers) + for index, atom in enumerate(atom_numbers): + self._check_atom_number(atom) + coords[index] = self.get_vector(atom) + if weighted == True: + weights[index] = self.atomic_numbers[atom] + + new_coord = list(np.average(coords, weights=weights, axis=0)) + return self.add_atom(coordinates=new_coord, symbol=symbol) + + def add_atom(self, symbol, coordinates): + """ + Add an atom with symbol ``symbol`` at position ``coordinates``. + + Args: + symbol (str): symbol of the atom (e.g. "Cl", "Ar", "C") + coordinates (list): the coordinates to add + + Returns: + the Molecule object + """ + + if (not isinstance(coordinates, (list, np.ndarray)) or (len(coordinates) != 3)): + raise TypeError("coordinates must be list with three elements") + + if not isinstance(symbol, str): + raise TypeError(f"symbol {symbol} must be a string!") + + number = get_number(symbol) + self.atomic_numbers = np.append(self.atomic_numbers, [number]).astype(np.int8).view(cctk.OneIndexedArray) + self.geometry = np.append(self.geometry, [coordinates], axis=0).view(cctk.OneIndexedArray) + self.bonds.add_node(self.num_atoms()) + + return self + + def remove_atom(self, number): + """ + Remove the atom with number ``number``. + + Args: + number (int): number of the atom + + Returns: + the Molecule object + """ + + self._check_atom_number(number) + + try: + self.bonds.remove_node(number) + self.geometry = np.delete(self.geometry, number - 1, axis=0).view(cctk.OneIndexedArray) + self.atomic_numbers = np.delete(self.atomic_numbers, number - 1).view(cctk.OneIndexedArray) + + #### need to renumber to fill gaps + self.bonds = nx.convert_node_labels_to_integers(self.bonds, first_label=1, ordering="sorted") + + return self + except Exception as e: + raise ValueError("removing atom {number} failed!") + + def get_atomic_number(self, atom): + """ + Get the atomic number for a given atom. + + Args: + atom (int): number of the first atom + + Returns: + atomic_number (int): the atomic number of that atom + """ + self._check_atom_number(atom) + return self.atomic_numbers[atom] + + def get_atomic_symbol(self, atom): + """ + Get the atomic symbol for a given atom. + + Args: + atom (int): number of the first atom + + Returns: + atomic_symbol (str): the atomic symbol of that atom + """ + atomic_number = self.get_atomic_number(atom) + return get_symbol(atomic_number) + + def get_atomic_symbols(self): + """ + Get a list of atomic symbols for this Molecule. + + Returns: + atomic_symbols (cctk.OneIndexedArray): the atomic symbols + """ + n_atoms = self.get_n_atoms() + l = [ self.get_atomic_symbol(i) for i in range(1,n_atoms+1) ] + return cctk.OneIndexedArray(l) + + def get_n_atoms(self): + """ + Determine how many atoms are in this Molecule. + + Returns + n_atoms (int): the number of atoms + """ + return len(self.atomic_numbers) + + def get_vector(self, atom, atom2=None, check=True): + """ + Get the geometry vector for a given atom. If two atoms are specified, gives the vector connecting them (from ``atom2`` to ``atom``). + ``mol.get_vector(atom)`` is thus equivalent to ``mol.get_vector(atom, origin)``. + + Args: + atom1 (int): number of the first atom + atom2 (int): number of the second atom (optional) + check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) + + Returns: + a Numpy array + """ + if check: + self._check_atom_number(atom) + + if atom2: + if check: + self._check_atom_number(atom2) + return (self.geometry[atom] - self.geometry[atom2]).view(np.ndarray) + else: + return self.geometry[atom].view(np.ndarray) + + def get_distance(self, atom1=None, atom2=None, check=True, _dist=compute_distance_between, atoms=None): + """ + Wrapper to compute distance between two atoms. + + This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_dist``). + + Args: + atom1 (int): number of the first atom + atom2 (int): number of the second atom + check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) + _dist (function): function usd to compute distance + atoms (list): list of atomic numbers + + Returns: + the distance, in Angstroms + """ + if (atom1 is None) and (atom2 is None): + assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" + assert len(atoms) == 2, "need 2 atom numbers to get distance" + atom1 = atoms[0] + atom2 = atoms[1] + + if check: + try: + atom1 = int(atom1) + atom2 = int(atom2) + except Exception as e: + raise TypeError("atom numbers cannot be cast to int!") + + self._check_atom_number(atom1) + self._check_atom_number(atom2) + + return _dist(self.get_vector(atom1, check=False), self.get_vector(atom2, check=False)) + + def get_sq_distance(self, atom1, atom2, check=True): + """ + Wrapper to compute squared distance between two atoms -- optimized for speed! + + Args: + atom1 (int): number of the first atom + atom2 (int): number of the second atom + check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) + + Returns: + the squared distance + """ + if check: + try: + atom1 = int(atom1) + atom2 = int(atom2) + except Exception as e: + raise TypeError("atom numbers cannot be cast to int!") + + self._check_atom_number(atom1) + self._check_atom_number(atom2) + + return np.sum(np.square(self.get_vector(atom1, atom2, check=False))) + + def get_angle(self, atom1=None, atom2=None, atom3=None, check=True, _angle=compute_angle_between, atoms=None): + """ + Wrapper to compute angle between three atoms. + + This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_angle``). + + Args: + atom1 (int): number of the first atom + atom2 (int): number of the second atom + atom3 (int): number of the third atom + check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) + _angle (function): function usd to compute angle + atoms (list): list of atom numbers + + Returns: + the angle, in degrees + """ + if (atom1 is None) and (atom2 is None) and (atom3 is None): + assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" + assert len(atoms) == 3, "need 3 atom numbers to get angle" + atom1 = atoms[0] + atom2 = atoms[1] + atom3 = atoms[2] + + if check: + try: + atom1 = int(atom1) + atom2 = int(atom2) + atom3 = int(atom3) + except Exception as e: + raise TypeError("atom numbers cannot be cast to int!") + + self._check_atom_number(atom1) + self._check_atom_number(atom2) + self._check_atom_number(atom3) + + v1 = self.get_vector(atom1, check=False) + v2 = self.get_vector(atom2, check=False) + v3 = self.get_vector(atom3, check=False) + + return _angle(v1 - v2, v3 - v2) + + def get_dihedral(self, atom1=None, atom2=None, atom3=None, atom4=None, check=True, _dihedral=compute_dihedral_between, atoms=None): + """ + Wrapper to compute dihedral angle between four atoms. + + This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_dihedral``). + + Args: + atom1 (int): number of the first atom + atom2 (int): number of the second atom + atom3 (int): number of the third atom + atom4 (int): number of the fourth atom + check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) + _dihedral (function): function used to compute dihedral + atoms (list): list of atom numbers + + Returns: + the dihedral angle, in degrees + """ + if (atom1 is None) and (atom2 is None) and (atom3 is None) and (atom4 is None): + assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" + assert len(atoms) == 4, "need 4 atom numbers to get dihedral angle" + atom1 = atoms[0] + atom2 = atoms[1] + atom3 = atoms[2] + atom4 = atoms[3] + + if check: + try: + atom1 = int(atom1) + atom2 = int(atom2) + atom3 = int(atom3) + atom4 = int(atom4) + except Exception as e: + raise TypeError("atom numbers cannot be cast to int!") + + self._check_atom_number(atom1) + self._check_atom_number(atom2) + self._check_atom_number(atom3) + self._check_atom_number(atom4) + + return _dihedral( + self.get_vector(atom1, check=False), + self.get_vector(atom2, check=False), + self.get_vector(atom3, check=False), + self.get_vector(atom4, check=False), + ) + + def get_bond_order(self, atom1, atom2): + """ + Wrapper to get bond order between two atoms. + + Args: + atom1 (int): number of the first atom + atom2 (int): number of the second atom + + Returns: + the bond order + """ + self._check_atom_number(atom1) + self._check_atom_number(atom2) + + if self.bonds.has_edge(atom1, atom2): + return self.bonds[atom1][atom2]["weight"] + else: + return 0 + + def are_connected(self, atom1, atom2): + """ + Wrapper to tell if two atoms are connected. + """ + self._check_atom_number(atom1) + self._check_atom_number(atom2) + + if atom1 in self._get_fragment_containing(atom2): + return True + else: + return False + + def get_atoms_by_symbol(self, symbol): + """ + Returns all the numbers of atoms of type ``symbol`` in the molecule. + """ + if not isinstance(symbol, str): + raise TypeError("symbol {symbol} must be a string") + + number = get_number(symbol) + atoms = [] + + for index, atom in enumerate(self.atomic_numbers, start=1): + if atom == number: + atoms.append(index) + + return atoms + + def get_heavy_atoms(self): + """ + Returns a list of all the heavy atoms in the molecule (i.e., not hydrogen), for array indexing. + """ + atoms = [] + + for index, atom in enumerate(self.atomic_numbers, start=1): + if atom != 1: + atoms.append(index) + + return atoms + + def get_adjacent_atoms(self, atom): + """ + Returns a list of the neighbors of ``atom``. If ``atom`` has no neighbors, an empty list will be returned. + """ + try: + atom = int(atom) + except Exception as e: + raise TypeError(f"atom number {atom} cannot be cast to int!") + + self._check_atom_number(atom) + + return list(self.bonds.neighbors(atom)) + + def num_atoms(self): + return len(self.atomic_numbers) + + def rms_distance_between_atoms(self): + """ + Returns the RMS distance (in Angstroms) between every pair of atoms - a quick, easy-to-calculate proxy for minimizing steric clashes. + """ + distance = 0 + for i in range(1, self.num_atoms() + 1): + for j in range(1, self.num_atoms() + 1): + if i == j: + continue + distance += self.get_distance(i, j) ** 2 + + return math.sqrt(distance) / self.num_atoms() + + def optimize_dihedral(self, atom1, atom2, atom3, atom4, step=10): + """ + Minimizes the value of ``self.rms_distance_between_atoms`` for the given dihedral, in one-degree increments. + A cheap alternative to geometry optimization using *ab initio* methods or density functional theory. + + Args: + atom1 (int): atom number of the first atom in the dihedral + atom2 (int): atom number of the second atom in the dihedral + atom3 (int): atom number of the third atom in the dihedral + atom4 (int): atom number of the fourth atom in the dihedral + step (float): explore angles from 0 to 360 with this stepsize in degrees + + Returns: + the final value of the angle + """ + self._check_atom_number(atom1) + self._check_atom_number(atom2) + self._check_atom_number(atom3) + self._check_atom_number(atom4) + + best_angle = 0 + best_dist = 0 + + for angle in range(0, 360, step): + self.set_dihedral(atom1, atom2, atom3, atom4, angle) + if self.rms_distance_between_atoms() > best_dist: + best_dist = self.rms_distance_between_atoms() + best_angle = angle + + self.set_dihedral(atom1, atom2, atom3, atom4, best_angle) + return best_angle + + def atom_string(self, atom): + """ + Returns the elemental symbol and the atom number for a given atom. + + For example, ``methane.atom_string(1)`` might return "C1". + + Args: + atom (int): number of the atom + + Returns: + the aforementioned atom string + """ + try: + atom = int(atom) + except Exception as e: + raise ValueError("atom cannot be cast to int") + + self._check_atom_number(atom) + + return f"{get_symbol(self.atomic_numbers[atom])}{atom}" + + def perturb(self, size=0.005): + """ + This function can be used to generate a slightly different molecule in cases where numerical (or geometric) converge is problematic. + + It adds a random variable (sampled from a normal distribution, centered at 0 with stddev ``size`) to every number in ``self.geometry``. + + Args: + size (float): stddev of the normal distribution + + Returns: + the Molecule object + """ + geometry = self.geometry + random = np.random.normal(scale=size, size=geometry.shape) + + self.geometry = geometry + random + return self + + def center(self): + """ + Moves the centroid to the origin. + """ + atoms = np.arange(1, self.num_atoms()+1) + self.translate_molecule(-self.geometry[atoms].mean(axis=0)) + return self + + @classmethod + def combine_molecules(cls, molecule1, molecule2): + """ + Combine two molecules into one final molecule. + + Bonding information is not currently preserved. + + Args: + molecule1 (Molecule): 1st molecule + molecule2 (Molecule): 2nd molecule + + Returns: + new ``Molecule`` object + """ + + atoms = np.hstack((molecule1.atomic_numbers.T, molecule2.atomic_numbers.T)).view(cctk.OneIndexedArray) + geoms = np.vstack((molecule1.geometry, molecule2.geometry)).view(cctk.OneIndexedArray) + charge = molecule1.charge + molecule2.charge + + s1 = (molecule1.multiplicity - 1) / 2 + s2 = (molecule2.multiplicity - 1) / 2 + multiplicity = (s1+s2) * 2 + 1 + + return Molecule(atoms, geoms, charge=charge, multiplicity=multiplicity) + + def volume(self, pts_per_angstrom=10, qhull=False): + """ + Returns volume calculated using the Gavezotti algorithm (JACS, 1983, 105, 5220). Relatively slow. + If MemoryError, defaults to a qhull-based approach (accurate in the limit as number of atoms goes to infinity) + + Args: + pts_per_angstrom (int): how many grid points to use per Å - time scales as O(n**3) so be careful! + qhull (bool): use faster QHull algorithm + + Returns: + volume in Å**3 + """ + if not qhull: + try: + assert isinstance(pts_per_angstrom, int), "Need an integer number of pts per Å!" + assert pts_per_angstrom > 0, "Need a positive integer of pts per Å!" + + box_max = np.max(self.geometry.view(np.ndarray), axis=0) + 4 + box_min = np.min(self.geometry.view(np.ndarray), axis=0) - 4 + + box_volume = (box_max[0] - box_min[0]) * (box_max[1] - box_min[1]) * (box_max[2] - box_min[2]) + + x_vals = np.linspace(box_min[0], box_max[0], int((box_max[0] - box_min[0]) * pts_per_angstrom)) + y_vals = np.linspace(box_min[1], box_max[1], int((box_max[1] - box_min[1]) * pts_per_angstrom)) + z_vals = np.linspace(box_min[2], box_max[2], int((box_max[2] - box_min[2]) * pts_per_angstrom)) + + # h4ck3r + box_pts = np.stack([np.ravel(a) for a in np.meshgrid(x_vals, y_vals, z_vals)], axis=-1) + + # caching to speed call + vdw_radii = {z: get_vdw_radius(z) for z in set(self.atomic_numbers)} + radii_per_atom = np.array([vdw_radii[z] for z in self.atomic_numbers]).reshape(-1,1) + + # this is the slow part since it's approximately a zillion operations + dists_per_atom = cdist(self.geometry.view(np.ndarray), box_pts) + occupied = np.sum(np.max(dists_per_atom < radii_per_atom, axis=0)) + + percent_occupied = occupied / box_pts.shape[0] + return percent_occupied * box_volume + except MemoryError: + qhull = True + + if qhull: + import scipy + hull = scipy.spatial.ConvexHull(self.geometry.view(np.ndarray)) + return hull.volume + + def swap_atom_numbers(self, atom1, atom2): + """ + Interchanges the numbers of ``atom1`` and ``atom2``. + + Args: + atom1 (int): number of 1st atom + atom2 (int): number of 2nd atom + + Returns + new ``Molecule`` object (does not modify in-place) + """ + self._check_atom_number(atom1) + self._check_atom_number(atom2) + mol = copy.deepcopy(self) + + z1 = mol.atomic_numbers[atom1] + z2 = mol.atomic_numbers[atom2] + g1 = copy.deepcopy(mol.geometry[atom1]) + g2 = copy.deepcopy(mol.geometry[atom2]) + + mol.atomic_numbers[atom2] = z1 + mol.atomic_numbers[atom1] = z2 + mol.geometry[atom2] = g1 + mol.geometry[atom1] = g2 + + mapping = {atom2: atom1, atom1: atom2} + mol.bonds = nx.relabel_nodes(mol.bonds, mapping, copy=True) + return mol + + def epimerize(self, center_atom, substituent1, substituent2): + """ + Epimerizes ``center_atom`` by exchanging the groups corresponding to ``substituent1`` and ``substituent2``. + Both substituents must be bonded to the center atom! + + Args: + center_atom (int): number of middle atom + substituent1 (int): number of 1st atom + substituent1 (int): number of 2nd atom + + Returns + new ``Molecule`` object (does not modify in-place) + """ + + self._check_atom_number(center_atom) + self._check_atom_number(substituent1) + self._check_atom_number(substituent2) + + assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + + adj = self.get_adjacent_atoms(center_atom) + assert len(adj) == 4, "center atom must be making 4 bonds!" + assert substituent1 in adj, "1st substituent is not bonded to center atom!" + assert substituent2 in adj, "2nd substituent is not bonded to center atom!" + + #### remove both substituents + mol, group1, mmap1, gmap1 = cctk.Group.remove_group_from_molecule(self, center_atom, substituent1, return_mapping=True) + mol, group2, mmap2, gmap2 = cctk.Group.remove_group_from_molecule(mol, mmap1[center_atom], mmap1[substituent2], return_mapping=True) + + h1 = mol.num_atoms() - 1 + h2 = mol.num_atoms() + + #### add them back in the opposite fashion + mol, mmap3, gmap3 = cctk.Group.add_group_to_molecule(mol, group2, h1, return_mapping=True) + mol = cctk.Group.add_group_to_molecule(mol, group1, mmap3[h2]) + + #### relabel new graph to match original molecule + which = top.get_stereogenic_centers(self) + which.remove(center_atom) + return mol.renumber_to_match(self, check_chirality=which) + + def renumber_to_match(self, model, check_chirality="all"): + """ + Renumbers atoms to match ``model`` (must have isomorphic bond graph). Returns a copy of ``self`` with renumbered atoms. + + Args: + model (cctk.Molecule): isomorphic molecule to renumber by + check_chirality (list of atomic numbers): atomic numbers to check, to prevent inversion due to graph isomorphism. + Alternatively ``None`` will prevent any checking and "all" will use ``cctk.topology.get_exchangable_centers()``. + + Returns: + new ``Molecule`` object + """ + + assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + + #### use networkx to generate mapping + #### you need the node matcher to distinguish between e.g. H, F, Cl + self._add_atomic_numbers_to_nodes() + model._add_atomic_numbers_to_nodes() + nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0) + + match = nx.algorithms.isomorphism.GraphMatcher(model.bonds, self.bonds, node_match=nm) + assert match.is_isomorphic(), "can't renumber non-isomorphic graphs!" + new_ordering = [match.mapping[x] for x in range(1, self.num_atoms() + 1)] + inv_mapping = {v:k for k,v in match.mapping.items()} # bit kludgy but works + + #### create renumbered molecule + mol = copy.deepcopy(self) + mol.atomic_numbers = self.atomic_numbers[new_ordering] + mol.geometry = self.geometry[new_ordering] + mol.bonds = nx.relabel_nodes(self.bonds, mapping=inv_mapping, copy=True) + + if check_chirality == "all": + check_chirality = top.get_exchangeable_centers(mol) + + #### diastereotopic protons get scrambled by the above code so we gotta go through and fix all of them + #### this happens because networkx doesn't store chirality - a known limitation of graph molecular encoding! + if isinstance(check_chirality, list): + #### find all the differences and exchange them + model_report = top.get_chirality_report(model, check_chirality) + + #### generate all meso ring permutations + candidates = top.flip_meso_rings(mol, atoms=check_chirality) + + #### for each, try flipping configuration of all centers + for candidate in candidates: + report = top.get_chirality_report(candidate, check_chirality) + for center in check_chirality: + if model_report[center] != report[center]: + try: + candidate = top.exchange_identical_substituents(candidate, center) + except ValueError as e: + break + + #### check that we actually fixed all the problems + mol_report = top.get_chirality_report(candidate, check_chirality) + all_good = True + for center in check_chirality: + if mol_report[center] != model_report[center]: + all_good = False + break + #### if we did, then return + if all_good: + return candidate + + raise ValueError("can't get a proper renumbering: are you *sure* these two molecules can have the same chirality?") + + def _add_atomic_numbers_to_nodes(self): + """ + Add the atomic numbers to each node attribute, to allow for distinguishment of F and H during graph renumbering. + """ + nx.set_node_attributes(self.bonds, {z: {"atomic_number": self.atomic_numbers[z]} for z in range(1, self.num_atoms() + 1)}) + + def is_atom_in_ring(self, atom): + assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + cycles = nx.cycle_basis(self.bonds, root=atom) + for cycle in cycles: + if atom in cycle: + return True + return False + + def get_components(self): + """ + Returns a list of all the connected components in a molecule. + """ + assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + fragments = nx.connected_components(self.bonds) + return [list(f) for f in list(fragments)] + + def limit_solvent_shell(self, solute=0, num_atoms=0, num_solvents=10, distance_from_atom=None, return_idxs=False): + """ + Automatically detects solvent molecules and removes them until you have a set number of solvents or atoms. + + The "distance" between molecules is the minimum of the pairwise atomic distances, to emphasize inner-sphere interactions. + + Args: + solute (int): which fragment is the solute, 0-indexed + num_atoms (int): remove atoms until there are this number (modulo the size of a solvent molecule) + num_solvents (int): remove solvent molecules until there are this number + distance_from_atom (int): if you want to find molecules closest to a given atom in the solute, specify the atom number here. + if this atom is not in the solute fragment, an exception will be raised. + return_idxs (bool): if True, indices of atoms that would be in the new molecule are returned. no change is made to ``self``. + + Returns: + new ``Molecule`` object + """ + assert isinstance(num_atoms, int) + assert isinstance(num_solvents, int) + + fragments = self.get_components() + solute_x = self.geometry[fragments[solute]].view(np.ndarray) + + if distance_from_atom: + assert distance_from_atom in fragments[solute], f"{distance_from_atom} is not in the solute fragment" + solute_x = self.geometry[[distance_from_atom]].view(np.ndarray) + + distances = np.zeros(shape=len(fragments)) + for i, f in enumerate(fragments): + if i == solute: + distances[i] = 0 + else: + solvent_x = self.geometry[f].view(np.ndarray) + # cdist is absurdly fast + pairwise_distances = cdist(solvent_x, solute_x) + distances[i] = np.min(pairwise_distances) + + mol = copy.deepcopy(self) + + #### reverse order - farthest away comes first + order = np.argsort(distances)[::-1] + + current_num_solvents = len(fragments) - 1 + current_num_atoms = mol.num_atoms() + + to_remove = [] + for i in order: + for j in fragments[i]: + to_remove.append(j) + current_num_atoms += -1 + current_num_solvents += -1 + + if current_num_atoms <= num_atoms or num_solvents == current_num_solvents: + if return_idxs: + all_idxs = set(range(1,self.num_atoms())) + return list(all_idxs - set(to_remove)) + else: + #### have to remove in reverse direction for indexing consistency + for j in sorted(to_remove, reverse=True): + mol.remove_atom(j) + return mol + + def center_periodic(self, center, side_length): + """ + Adjusts a molecule to be in the center of a cube, moving all other molecules accordingly. Bonded subgroups will be moved as a unit. + + For analysis of MD files with periodic boundary conditions. + + Args: + center (int): atomic number to center + side_length (float): length of side, in Å + """ + self._check_atom_number(center) + assert isinstance(side_length, (int, float)) + assert side_length > 0 + + #### Center the atom of interest + self.geometry += -1 * self.geometry[center] + self.geometry += side_length / 2 + + for f in self.get_components(): + centroid = np.mean(self.geometry[f], axis=0) + self.geometry[f] += -1 * np.floor_divide(centroid, side_length) * side_length + + return self + + @classmethod + def new_from_name(cls, name): + """ + Create a new ``Molecule`` instance using ``rdkit``. + """ + assert isinstance(name, str) + from urllib.request import urlopen + + try: + url_name = re.sub(" ", "%20", name) + url = 'http://cactus.nci.nih.gov/chemical/structure/' + url_name + '/smiles' + smiles = urlopen(url, timeout=5).read().decode('utf8') + return cls.new_from_smiles(smiles) + except Exception as e: + raise ValueError(f"something went wrong auto-generating molecule {name}:\nurl: {url}\n{e}") + + @classmethod + def new_from_smiles(cls, smiles): + """ + Create a new ``Molecule`` instance using ``rdkit``. + """ + assert isinstance(smiles, str) + + try: + from rdkit.Chem import AllChem as Chem + except ImportError as e: + raise ImportError(f"``rdkit`` must be installed for this function to work!\n{e}") + + try: + rdkm = Chem.MolFromSmiles(smiles) + rdkm = Chem.AddHs(rdkm) + Chem.EmbedMolecule(rdkm) + Chem.MMFFOptimizeMolecule(rdkm) + + nums = [] + for atom in rdkm.GetAtoms(): + nums.append(atom.GetAtomicNum()) + geom = rdkm.GetConformers()[0].GetPositions() + + return cls(nums, geom) + + except Exception as e: + raise ValueError(f"something went wrong auto-generating molecule {smiles}:\n{e}") + + def fragment(self): + """ + Returns list of ``cctk.Molecule`` objects based on the bond-connected components of ``self``. + """ + fragments = list() + indices = self.get_components() + for idx in indices: + mol = cctk.Molecule(self.atomic_numbers[idx], self.geometry[idx]).assign_connectivity() + fragments.append(mol) + return fragments + + def get_symmetric_atoms(self): + """ + Returns lists of symmetric atoms, as defined in ``cctk.load_group``. + + Useful for NMR spectroscopy, etc. + """ + from cctk.load_groups import group_iterator + + symmetric_sets = [] + for group in group_iterator(symmetric_only=True): + # this gives us a list of dictionaries mapping from self.atomic_numbers to group numbers + matches = top.find_group(self, group) + + for m in matches: + i = {v: k for k,v in m.items()} + for n in group.isomorphic: + symmetric_sets.append([i[idx] for idx in n]) + + #### some groups overlap (e.g. methyl and t-butyl), so now we collapse the overlapping sets + for i, s1 in enumerate(symmetric_sets): + for j, s2 in enumerate(symmetric_sets[i+1:]): + if set(s1).intersection(set(s2)): + symmetric_sets[i + j + 1] = list(set(s1).union(s2)) + symmetric_sets[i] = None # can't delete yet - messes up indexing + + #### now we delete + symmetric_sets = list(filter(None, symmetric_sets)) + return symmetric_sets + + def atomic_symbols(self): + """ + Return list of atomic symbols. + """ + symbols = {z: get_symbol(z) for z in set(self.atomic_numbers)} + return [symbols[z] for z in self.atomic_numbers] + + def optimize(self, inplace=True, nprocs=1, return_energy=False): + """ + Optimize molecule at the GFN2-xtb level of theory. + + Args: + inplace (Bool): whether or not to return a new molecule or simply modify ``self.geometry`` + nprocs (int): number of processors to use + return_energy (Bool): whether to return energy or not + """ + import cctk.optimize as opt + assert isinstance(nprocs, int), "nprocs must be int!" + optimized, energy = opt.optimize_molecule(self, nprocs=nprocs, return_energy=True) + + if inplace: + self.geometry = optimized.geometry + if return_energy: + return self, energy + else: + return self + else: + if return_energy: + return optimized, energy + else: + return optimized + + def compute_energy(self, nprocs=1): + """ + Compute energy of molecule at the GFN2-xtb level of theory. + + Args: + nprocs (int): number of processors to use + """ + import cctk.optimize as opt + assert isinstance(nprocs, int), "nprocs must be int!" + energy = opt.get_energy(self, nprocs=nprocs) + return energy + + def csearch(self, nprocs=1, constraints=[], logfile=None, noncovalent=False, use_tempdir=True, gfn=2, additional_flags=None): + """ + Optimize molecule at the GFN2-xtb level of theory. + + Args: + nprocs (int): number of processors to use + constraints (list): atoms numbers to freeze + noncovalent (bool): whether or not to use non-covalent settings + logfile (str): file to write ongoing ``crest`` output to + use_tempdir (bool): write intermediate files to hidden directory (as opposed to current directory) + gfn (int or ``ff``): level of theory, either 1, 2, or ``ff`` + additional_flags (str): additional flags for command line + + Returns + ConformationalEnsemble + """ + import cctk.optimize as opt + assert isinstance(nprocs, int), "nprocs must be int!" + return opt.csearch(molecule=self, nprocs=nprocs, constraints=constraints, noncovalent=noncovalent, logfile=logfile, use_tempdir=use_tempdir, gfn=gfn, additional_flags=additional_flags) + + def num_neighbors_by_atom(self): + """ + Returns a list of the number of neighbors of each atom. + """ + result = [] + for i in range(self.num_atoms()): + result.append(len(self.get_adjacent_atoms(i))) + return result + + def atoms_moving_in_imaginary(self, max_num=5, percent_cutoff=0.03, return_string=False): + """ + Returns atoms moving in imaginary, ranked by how much they're moving. + + Args: + max_num (int): how many atoms max to return + percent_cutoff (float): threshold for what percent of total TS movement qualifies as "movement" + return_string (bool): whether or not to return a formatted string report + + Returns: + list of atomic numbers or string + """ + imaginary = 0 + ts_mode = None + for mode in self.vibrational_modes: + if mode.frequency < imaginary: + imaginary = mode.frequency + ts_mode = mode + + if ts_mode is None: + if return_string: + return "" + else: + return None + + displacements = np.linalg.norm(ts_mode.displacements.view(np.ndarray), axis=-1) + + atoms_ranked = np.argsort(displacements)[::-1] + 1 + percent_movement = np.sort(displacements)[::-1] / np.sum(displacements) + + return_list, string = list(), "" + for atom, percent in zip(atoms_ranked, percent_movement): + if percent > percent_cutoff and len(return_list) <= max_num: + return_list.append(atom) + string += f"{self.atom_string(atom)} ({percent:.1%}), " + else: + if return_string: + return string[:-2] + else: + return return_list + + + def to_string(self): + """ + Save the current molecule as a string, for subsequent loading. Not human-readable. + + Vibrational modes are currently not saved. + """ + # name, charge, multiplicity need no encoding + atomic_number_encoding = numpy_to_bytes(self.atomic_numbers.view(np.ndarray)) + geometry_encoding = numpy_to_bytes(self.geometry.view(np.ndarray)) + bonds_encoding = numpy_to_bytes(nx.convert_matrix.to_numpy_array(self.bonds)) + + if self.name is None: + self.name = "name" + + cctk_version = pkg_resources.get_distribution("cctk").version + + store_dict = { + "name": self.name, + "charge": self.charge, + "multiplicity": self.multiplicity, + "atomic_numbers": atomic_number_encoding, + "geometry": geometry_encoding, + "bonds": bonds_encoding, + "cctk_version": cctk_version, + } + + return yaml.dump(store_dict) + + @classmethod + def from_string(cls, string, check_version=True): + """ + Loads a ``cctk.Molecule`` object from a string. + + Arguments: + string (str): stringified version of the molecule + check_version (bool): whether version consistency should be enforced + """ + + try: + store_dict = yaml.safe_load(string) + + if check_version: + cctk_version = pkg_resources.get_distribution("cctk").version + assert cctk_version == store_dict["cctk_version"], f"Warning: the data was saved in cctk {store_dict['cctk_version']} but is being loaded in cctk {cctk_version}!" + + atomic_numbers = bytes_to_numpy(store_dict["atomic_numbers"]).astype(np.int8) + geometry = bytes_to_numpy(store_dict["geometry"]).astype(np.float32) + bonds = nx.convert_matrix.from_numpy_array(bytes_to_numpy(store_dict["bonds"])) + + mol = cls( + atomic_numbers, + geometry, + bonds=bonds, + charge=store_dict["charge"], + multiplicity=store_dict["multiplicity"], + name=store_dict["name"], + checks=False, # trust nx data implicitly + ) + + return mol + + except Exception as e: + raise ValueError(f"this stringified Molecule fails import: {e}") + + def coulomb_analysis(self, atoms1, atoms2, charges): + """ + Computes the net Coulomb forces between atoms ``atoms1`` and atoms ``atoms2``. + """ + if isinstance(charges, np.ndarray): + charges = charges.view(cctk.OneIndexedArray) + elif isinstance(charges, list): + charges = cctk.OneIndexedArray(charges) + + assert isinstance(charges, cctk.OneIndexedArray), "charges must be cctk.OneIndexedArray" + assert len(charges) == self.num_atoms(), "need a charge for every atom" + assert isinstance(atoms1, list) + assert isinstance(atoms2, list) + + q1 = charges[atoms1] + q2 = charges[atoms2] + + # need to convert to Bohr + r1 = self.geometry[atoms1] / 0.529 + r2 = self.geometry[atoms2] / 0.529 + + R = cdist(r1, r2)**2 + Q = np.outer(q1, q2) + + energy = 0 + for i in range(len(atoms1)): + assert atoms1[i] not in atoms2, "lists must be non-overlapping" + for j in range(len(atoms2)): + energy += Q[i][j] / R[i][j] + + return energy * 627.509 # convert to kcal/mol diff --git a/build/lib/cctk/optimize.py b/build/lib/cctk/optimize.py new file mode 100644 index 0000000..598119c --- /dev/null +++ b/build/lib/cctk/optimize.py @@ -0,0 +1,181 @@ +""" +Functions to assist in optimizing structures. +""" + +import os, tempfile, shutil, re +import cctk +import subprocess as sp + +from enum import Enum + +class Methods(Enum): + """ + Enum of different computational methods. For now, just GFN2-xtb is implemented. + """ + GFN2_XTB = "xtb" + +def installed(command): + if shutil.which(command) is not None: + return True + if re.search(command, os.environ["PATH"]): + return True + + return False + +def optimize_molecule(molecule, method=Methods.GFN2_XTB, nprocs=1, return_energy=False): + """ + Dispatcher method to connect method to molecule. + + Args: + molecule (cctk.Molecule): + method (Methods): + nprocs (int): number of cores to employ + return_energy (Bool): to return energy or not + + Returns: + molecule + energy (optional) + """ + assert isinstance(molecule, cctk.Molecule), "need a valid molecule!" + assert isinstance(method, Methods), "need a valid molecule!" + + if method is Methods.GFN2_XTB: + return run_xtb(molecule, nprocs=nprocs, return_energy=return_energy, opt=True) + +def get_energy(molecule, method=Methods.GFN2_XTB, nprocs=1): + """ + Dispatcher method to connect method to molecule. + + Args: + molecule (cctk.Molecule): + method (Methods): + nprocs (int): number of cores to employ + + Returns: + energy + """ + assert isinstance(molecule, cctk.Molecule), "need a valid molecule!" + assert isinstance(method, Methods), "need a valid molecule!" + + if method is Methods.GFN2_XTB: + _, energy = run_xtb(molecule, nprocs=nprocs, return_energy=True, opt=False) + return energy + +def run_xtb(molecule, nprocs=1, return_energy=False, opt=False): + """ + Run ``xtb`` in a temporary directory and return the output molecule. + """ + assert isinstance(molecule, cctk.Molecule), "need a valid molecule!" + assert isinstance(nprocs, int) + + assert installed("xtb"), "xtb must be installed!" + + command = f"xtb --gfn 2 --chrg {molecule.charge} --uhf {molecule.multiplicity - 1}" + if nprocs > 1: + command += f" --parallel {nprocs}" + + if opt: + command += " xtb-in.xyz --opt tight &> xtb-out.out" + else: + command += " xtb-in.xyz &> xtb-out.out" + + try: + os.environ["OMP_NUM_THREADS"] = str(nprocs) + os.environ["MKL_NUM_THREADS"] = str(nprocs) + with tempfile.TemporaryDirectory() as tmpdir: + cctk.XYZFile.write_molecule_to_file(f"{tmpdir}/xtb-in.xyz", molecule) + sp.run(command, stdout=sp.PIPE, stderr=sp.PIPE, cwd=tmpdir, shell=True) + + output_mol, energy = None, None + if opt: + output_mol = cctk.XYZFile.read_file(f"{tmpdir}/xtbopt.xyz").get_molecule() + energy_file = cctk.File.read_file(f"{tmpdir}/xtbopt.log") + fields = energy_file[1].split() + energy, gradient = float(fields[1]), float(fields[3]) + + else: + # stopgap solution but should work ok. XTB output files should actually be parsed eventually. + # ccw 4.15.21 + output_file = cctk.File.read_file(f"{tmpdir}/xtb-out.out") + r = re.compile("total energy\s+(-?\d+.\d+)", re.IGNORECASE) + for line in output_file[::-1]: + m = r.search(line) + if m: + energy = float(m.group(1)) + break + + if return_energy: + return output_mol, energy + else: + return output_mol + except Exception as e: + raise ValueError(f"Error running xtb:\n{e}") + +def csearch(use_tempdir=True, **kwargs): + """ + Run a conformational search on a molecule using ``crest``. + + Args: + molecule (cctk.Molecule): molecule of interest + constraints (list): list of atom numbers to constrain + nprocs (int): number of processors to use + noncovalent (Bool): whether or not to use non-covalent settings + logfile (str): file to write ongoing ``crest`` output to + additional_flags (str): flags to pass to command line + + Returns: + cctk.ConformationalEnsemble + """ + assert installed("crest"), "crest must be installed!" + + ensemble = None + try: + if use_tempdir: + with tempfile.TemporaryDirectory() as tmpdir: + ensemble = _do_csearch(directory=tmpdir, **kwargs) + else: + ensemble = _do_csearch(directory=os.getcwd(), **kwargs) + except Exception as e: + raise ValueError(f"Error running xtb:\n{e}") + + return ensemble + +def _do_csearch(molecule, directory, gfn=2, nprocs=1, logfile=None, noncovalent=False, constraints=None, additional_flags=None): + assert isinstance(molecule, cctk.Molecule), "need a valid molecule!" + assert isinstance(nprocs, int) + assert isinstance(logfile, str) + + assert gfn in [2, 1, "ff"], "invalid value for ``gfn``!" + + cctk.XYZFile.write_molecule_to_file(f"{directory}/xtb-in.xyz", molecule) + + nci = "" + if noncovalent: + nci = "-nci" + + command = None + if constraints is not None: + assert isinstance(constraints, list) + assert all(isinstance(n, int) for n in constraints) + command = f"crest xtb-in.xyz --constrain {','.join([str(c) for c in constraints])}" + result = sp.run(command, stdout=sp.PIPE, stderr=sp.PIPE, cwd=directory, shell=True) + result.check_returncode() + command = f"crest xtb-in.xyz --gfn{gfn} --chrg {molecule.charge} -cinp .xcontrol.sample --uhf {molecule.multiplicity - 1} -T {nprocs} {nci}" + else: + command = f"crest xtb-in.xyz --gfn{gfn} --chrg {molecule.charge} --uhf {molecule.multiplicity - 1} -T {nprocs} {nci}" + + if additional_flags is not None: + command = command + " " + additional_flags + + if logfile: + with open(logfile, "w") as f: + result = sp.run(command, stdout=f, stderr=f, cwd=directory, shell=True) + else: + result = sp.run(command, stdout=sp.PIPE, stderr=sp.PIPE, cwd=directory, shell=True) + result.check_returncode() + + ensemble = cctk.XYZFile.read_ensemble(f"{directory}/crest_conformers.xyz").ensemble + return ensemble + + + diff --git a/build/lib/cctk/orca_file.py b/build/lib/cctk/orca_file.py new file mode 100644 index 0000000..0c1e62e --- /dev/null +++ b/build/lib/cctk/orca_file.py @@ -0,0 +1,375 @@ +import re +import numpy as np + +from enum import Enum + +from cctk import File, Molecule, ConformationalEnsemble +from cctk.helper_functions import get_symbol, get_corrected_free_energy + +import cctk.parse_orca as parse + +class OrcaJobType(Enum): + """ + Class representing allowed Orca job types. Not an exhaustive list, but should be fairly comprehensive. + + The value should be the Orca keyword, to permit automatic assignment. + + All jobs have type ``SP`` by default. + """ + + SP = "sp" + """ + Single point energy calculation. + """ + + OPT = "opt" + """ + Geometry optimization. + """ + + FREQ = "freq" + """ + Hessian calculation. + """ + + NMR = "nmr" + """ + NMR shielding prediction. + """ + +#### This static variable tells what properties are expected from each JobType. +EXPECTED_PROPERTIES = { + "sp": ["energy", "scf_iterations",], +# "opt": ["rms_gradient", "rms_step", "max_gradient", "max_step"], + "opt": [], + "freq": ["gibbs_free_energy", "enthalpy", "frequencies", "temperature"], + "nmr": ["isotropic_shielding",], +} + + +class OrcaFile(File): + """ + Generic class for all Orca `.inp` and `.out` files. + + Attributes: + ensemble (ConformationalEnsemble): `ConformationalEnsemble` instance + job_types (list): list of ``OrcaJobType`` instances + header (str): keyword line or lines + variables (dict): list of variables to specify (e.g. ``{"maxcore": 2000}``). + blocks (dict): list of blocks to change specific settings + In general, the key should be the block name and the value should be a list of desired lines. + For instance, configuring a time-dependent DFT job might look like ``{"tddft": ["maxdim 5", "nroots 50"]}`` + successful_terminations (int): number of successful terminations + elapsed_time (float): total time for job in seconds + """ + + def __init__(self, job_types, ensemble=None, header=None, variables=None, blocks=None): + if job_types is not None: + if not all(isinstance(job, OrcaJobType) for job in job_types): + raise TypeError(f"invalid job type {job}") + self.job_types = job_types + else: + raise ValueError("need job types for new Orca file") + + if ensemble and isinstance(ensemble, ConformationalEnsemble): + self.ensemble = ensemble + else: + self.ensemble = ConformationalEnsemble() + + if header and isinstance(header, str): + self.header = header + else: + self.header = None + + if blocks and isinstance(blocks, dict): + for lines in list(blocks.values()): + assert isinstance(lines, list) + self.blocks = blocks + else: + self.blocks = {} + + if variables and isinstance(variables, dict): + self.variables = variables + else: + self.variables = {} + + @classmethod + def read_file(cls, filename): + if re.search("inp$", filename): + return cls._read_inp_file(filename) + + multiple_lines = parse.split_multiple_inputs(filename) + files = [] + + for lines in multiple_lines: + input_lines = parse.extract_input_file(lines) + header = parse.read_header(input_lines) + job_types = cls._assign_job_types(header) + variables, blocks = parse.read_blocks_and_variables(input_lines) + + success = 0 + elapsed_time = 0 + for line in lines: + if line.strip().startswith("****ORCA TERMINATED NORMALLY****"): + success += 1 + elif line.startswith("TOTAL RUN TIME"): + fields = line.split() + assert len(fields) == 13, f"unexpected number of fields on elapsed time line:\n{line}" + days = float(fields[3]) + hours = float(fields[5]) + minutes = float(fields[7]) + seconds = float(fields[9]) + elapsed_time = days * 86400 + hours * 3600 + minutes * 60 + seconds + + energies, iters = parse.read_energies(lines) + if len(energies) == 0: + return None + + atomic_numbers, geometries = parse.read_geometries(lines, num_to_find=len(energies)) + assert len(geometries) >= len(energies), "can't have an energy without a geometry (cf. pigeonhole principle)" + + charge = lines.find_parameter("xyz", 6, 4)[0] + multip = lines.find_parameter("xyz", 6, 5)[0] + + #### TODO + # detect Mayer bond orders + + f = OrcaFile(job_types, header=header, variables=variables, blocks=blocks) + f.elapsed_time = elapsed_time + f.successful_terminations = success + + molecules = [None] * len(geometries) + properties = [{} for _ in range(len(geometries))] + for idx, geom in enumerate(geometries): + molecules[idx] = Molecule(atomic_numbers, geom, charge=charge, multiplicity=multip, bonds=None) + if idx < len(energies): + properties[idx]["energy"] = energies[idx] + properties[idx]["filename"] = filename + properties[idx]["iteration"] = idx + properties[idx]["scf_iterations"] = iters[idx] + + if multip > 1: + s2 = lines.find_parameter("Expectation value of", 6, 5) + for idx, spin_contam in enumerate(s2): + properties[idx]["S**2"] = spin_contam + + if OrcaJobType.OPT in job_types: + rms_grad, max_grad, rms_step, max_step = parse.read_gradients(lines, len(properties)) + for idx in range(len(rms_grad)): + if idx < len(rms_grad): + properties[idx]["rms_gradient"] = rms_grad[idx] + + if idx < len(max_grad): + properties[idx]["max_gradient"] = max_grad[idx] + + if idx < len(rms_step): + properties[idx]["rms_step"] = rms_step[idx] + + if idx < len(max_step): + properties[idx]["max_step"] = max_step[idx] + + if OrcaJobType.FREQ in job_types: + properties[-1]["frequencies"] = sorted(parse.read_freqs(lines)) + + enthalpies = lines.find_parameter("Total Enthalpy", expected_length=5, which_field=3) + if len(enthalpies) == 1: + properties[-1]["enthalpy"] = enthalpies[0] + elif len(enthalpies) > 1: + raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}") + + gibbs = lines.find_parameter("Final Gibbs free enthalpy", expected_length=7, which_field=5) + if len(gibbs) == 1: + properties[-1]["gibbs_free_energy"] = gibbs[0] + elif len(gibbs) > 1: + raise ValueError(f"unexpected # of gibbs free energies found!\ngibbs free energies = {enthalpies}") + + temperature = lines.find_parameter("Temperature", expected_length=4, which_field=2) + if len(temperature) == 1 and len(gibbs) > 0: + properties[-1]["temperature"] = temperature[0] + corrected_free_energy = get_corrected_free_energy(gibbs[0], properties[-1]["frequencies"], + frequency_cutoff=100.0, temperature=temperature[0]) + properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy) + + if OrcaJobType.NMR in job_types: + nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms()) + if nmr_shifts is not None: + properties[-1]["isotropic_shielding"] = nmr_shifts + + try: + charges = parse.read_mulliken_charges(lines) + assert len(charges) == len(atomic_numbers) + properties[-1]["mulliken_charges"] = charges + except Exception as e: + pass + + try: + charges = parse.read_loewdin_charges(lines) + assert len(charges) == len(atomic_numbers) + properties[-1]["lowdin_charges"] = charges + except Exception as e: + pass + + try: + dipole = lines.find_parameter("Magnitude \(Debye\)", 4, 3) + properties[-1]["dipole_moment"] = dipole[0] + except Exception as e: + pass + + for mol, prop in zip(molecules, properties): + f.ensemble.add_molecule(mol, properties=prop) + + f.check_has_properties() + files.append(f) + + if len(files) == 1: + return files[0] + else: + return files + + @classmethod + def _read_inp_file(cls, filename): + print("reading ``.inp`` files is not currently supported :(") + return None + + def write_file(self, filename, molecule=None, header=None, variables=None, blocks=None): + """ + Write a ``.inp`` file, using object attributes. If no header is specified, the object's header will be used. + + Args: + filename (str): path to the new file + molecule (int): which molecule to use -- passed to ``self.get_molecule()``. + Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble.molecules (0-indexed). + A ``Molecule`` object can also be passed, in which case that molecule will be written to the file. + header (str): header for new file + """ + if molecule is None: + molecule = -1 + if not isinstance(molecule, Molecule): + molecule = self.ensemble.molecules[molecule] + + if header is None: + header = self.header + + if variables is None: + variables = self.variables + + if blocks is None: + blocks = self.blocks + + self.write_molecule_to_file(filename, molecule, header, variables, blocks) + + @classmethod + def write_molecule_to_file(cls, filename, molecule, header, variables=None, blocks=None, print_symbol=False): + """ + Write an ``.inp`` file using the given molecule. + + Args: + filename (str): path to the new file + molecule (Molecule): which molecule to use -- a ``Molecule`` object. + header (str): header for new file + print_symbol (Bool): if atomic symbols should be printed instead of atomic numbers + """ + assert isinstance(molecule, Molecule), "need a valid molecule to write a file!" + assert isinstance(header, str), "can't write a file without a header" + + text = f"{header.strip()}\n" + + if variables is not None: + assert isinstance(variables, dict), "blocks must be a dictionary" + for k, v in variables.items(): + text += f"%{k} {v}\n" + + if blocks is not None: + assert isinstance(blocks, dict), "blocks must be a dictionary" + for k, v in blocks.items(): + text += f"%{k}\n" + for line in v: + text += f"\t{line}\n" + text += "end\n" + + text +="\n" + + text += f"* xyz {int(molecule.charge)} {int(molecule.multiplicity)}\n" + for index, Z in enumerate(molecule.atomic_numbers, start=1): + line = molecule.get_vector(index) + if print_symbol: + Z = get_symbol(Z) + text += f"{Z:>2} {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n" + else: + text += f"{Z:2d} {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n" + + text += "*\n" + text += "\n" + + #### write the file + super().write_file(filename, text) + + def get_molecule(self, num=None): + """ + Returns the last molecule (from an optimization job or other multi-molecule jobs) or the only molecule (from other jobs). + + If ``num`` is specified, returns that job (1-indexed for positive numbers). So ``job.get_molecule(3)`` will return the 3rd element of ``job.molecules``, not the 4th. + """ + # some methods pass num=None, which overrides setting the default above + if num is None: + num = -1 + + if not isinstance(num, int): + raise TypeError("num must be int") + + return self.ensemble.molecule_list()[num] + + def num_imaginaries(self): + """ + Returns the number of imaginary frequencies. + """ + return len(self.imaginaries()) + + def imaginaries(self): + """ + Returns the imaginary frequencies, rounded to the nearest integer. + """ + if (OrcaJobType.FREQ in self.job_types) and (self.ensemble[-1:,"frequencies"] is not None): + freqs = self.ensemble[-1:,"frequencies"] + if not isinstance(freqs, list) or len(freqs) == 0: + return list() + else: + return list(map(int, np.array(freqs)[np.array(freqs) < 0])) + else: + return list() + + + @classmethod + def _assign_job_types(cls, header): + """ + Assigns ``OrcaJobType`` objects from route card. ``OrcaJobType.SP`` is assigned by default. + + Args: + header (str): Orca header + + Returns: + list of ``OrcaJobType`` objects + """ + job_types = [] + for name, member in OrcaJobType.__members__.items(): + if re.search(f" {member.value}", str(header), re.IGNORECASE): + job_types.append(member) + if OrcaJobType.SP not in job_types: + job_types.append(OrcaJobType.SP) + return job_types + + def check_has_properties(self): + """ + Checks that the file has all the appropriate properties for its job types, and raises ``ValueError`` if not. + + This only checks the last molecule in ``self.ensemble``, for now. + """ + if self.successful_terminations > 0: + for job_type in self.job_types: + for prop in EXPECTED_PROPERTIES[job_type.value]: + if not self.ensemble.has_property(-1, prop): + raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!") + else: + return + + diff --git a/build/lib/cctk/parse_gaussian.py b/build/lib/cctk/parse_gaussian.py new file mode 100644 index 0000000..3cd9eb8 --- /dev/null +++ b/build/lib/cctk/parse_gaussian.py @@ -0,0 +1,768 @@ +import numpy as np +import re +import ahocorasick + +import cctk +from cctk.helper_functions import get_corrected_free_energy + +""" +Functions to help with parsing Gaussian files +""" + +def read_file_fast(file_text, filename, link1idx, max_len=20000, extended_opt_info=False, fail_silently=True): + + #### "Make your bottleneck routines fast, everything else clear" - M. Scott Shell, UCSB + #### Welcome to the fast part! + + #### Here we identify all the lines we're going to scrape + words = [ + "SCF Done", + "Entering Link 1", + "Normal termination", + "Elapsed time", + "Multiplicity", + "RMS Force", #5 + "RMS Displacement", + "Maximum Force", + "Maximum Displacement", + "Cartesian Forces", + "Internal Forces", #10 + "Predicted change in Energy", + "thermal Enthalpies", + "thermal Free Energies", + "Frequencies", + "Temperature", #15 + "Isotropic", + "EUMP2", + "EUMP3", + "UMP4(SDTQ)", + "Wavefunction amplitudes converged", #20 + ] + + #### And here are the blocks of text + #### format: [start, stop, num] + + blocks = [ + ["#p", "----", 1], + ["/99;", "Symbolic Z-matrix", 1], + ["The following ModRedundant input section", "\n \n", 1], + [ + ["Input orientation", "Standard orientation", "Cartesian Coordinates"], + "Leave Link 202", + 1000, + ], + ["Wallingford", "#p", 1], + ["Initial Parameters", "! A", 1], #5 + ["Total nuclear spin-spin coupling J", "Leave Link", 1], + ["Forces (Hartrees/Bohr)", "Cartesian Forces", 1], + ["Hirshfeld charges, spin densities, dipoles, and CM5 charges", " Hirshfeld charges", 1], + ["Mulliken charges", "Sum of Mulliken charges", 1], + ["Electronic spatial extent", "Quadrupole moment", 1], #10 + ["normal coordinates", "Thermochemistry", 1], + ["Isotropic", "Eigenvalues", 1000], + ] + + word_matches = [[] for _ in words] + block_matches = [[] for _ in blocks] + + A = ahocorasick.Automaton() + + for idx, word in enumerate(words): + A.add_word(word, idx) + + for idx, b in enumerate(blocks): + if isinstance(b[0], list): + for start in b[0]: + A.add_word(start, ("start", idx)) + else: + A.add_word(b[0], ("start", idx)) + + #### perform search + A.make_automaton() + found_words = A.iter(file_text) + + #### now, we have to expand our one-character matches to whole lines/blocks + #### this is the slowest part + for position, idx in found_words: + if isinstance(idx, int): + stepsize = 10 + + match = file_text[position] + i = position + 1 + while match[-1-stepsize:].find("\n") < 0: + match = match + file_text[i:i+stepsize] + i += stepsize + + match = match.split("\n")[0] + + j = position + while match[:stepsize].find("\n") < 0: + match = file_text[j-stepsize:j] + match + j += -1 * stepsize + + match = match.split("\n")[-1] + word_matches[idx].append(match) + + elif isinstance(idx, tuple): + idx = idx[1] + if len(block_matches[idx]) >= blocks[idx][2]: + continue + + match = "" + i = position - len(blocks[idx][0]) + 1 + end = blocks[idx][1] + + stepsize = 1000 + file_len = len(file_text) + + #### we're looking for the end, but we take steps with length ``stepsize`` to go faster + while match[-1 * (stepsize + len(end)):-1].count(end) == 0 and match.count("\n") < max_len: + match = match + file_text[i:i+stepsize] + i += stepsize + + if i > file_len: + break + + match = match.split(end)[0] + + # special geometry handling :/ + if idx == 3: + # ccw 10.8.2021 - changed "==" to "<=" to prevent issues where # geoms would get stuck. + # can't remember quite why this was needed. hopefully it is ok this way. tests pass. + if len(block_matches[3]) <= len(word_matches[0]): + block_matches[3].append(match) + else: + block_matches[3][-1] = match + + else: + block_matches[idx].append(match) + + del file_text # here, have your RAM back! + + if len(block_matches[1]) == 0: + raise ValueError(f"Can't find a title block - something is wrong with {filename}! (cctk requires Gaussian output files to have been run in ``#p`` verbose mode)") + + #### and from here, we're off to the races! + n, g = parse_geometry(block_matches[3]) + title, link0, route_card, footer, job_types = parse_header_footer(block_matches[0], block_matches[1], block_matches[2], block_matches[4]) + energies, scf_iterations = parse_energies(word_matches[0]) + success, elapsed_time = parse_success_elapsed_time(word_matches[2], word_matches[3]) + charge, multip = parse_charge_multiplicity(word_matches[4]) + bonds = parse_bonds(block_matches[5]) + + # post-HF methods give weird energies + if re.search("mp2", route_card, re.IGNORECASE): + energies = parse_mp2_energies(word_matches[17]) + elif re.search("mp3", route_card, re.IGNORECASE): + energies = parse_mp3_energies(word_matches[18]) + elif re.search("mp4", route_card, re.IGNORECASE): + energies = parse_mp4_energies(word_matches[19]) + elif re.search("ccsd", route_card, re.IGNORECASE): + energies = parse_cc_energies(word_matches[20]) + elif re.search("cisd", route_card, re.IGNORECASE): + energies = parse_ci_energies(word_matches[20]) + + f = cctk.GaussianFile(job_types=job_types, route_card=route_card, link0=link0, footer=footer, success=success, elapsed_time=elapsed_time, title=title) + + molecules = [None] * len(g) + properties = [{} for _ in range(len(g))] + for idx, geom in enumerate(g): + molecules[idx] = cctk.Molecule(n[0], geom, charge=charge, multiplicity=multip, bonds=bonds, checks=False) + if idx < len(energies): + properties[idx]["energy"] = energies[idx] + if idx < len(scf_iterations): + properties[idx]["scf_iterations"] = scf_iterations[idx] + properties[idx]["link1_idx"] = link1idx + properties[idx]["filename"] = filename + properties[idx]["iteration"] = idx + + if cctk.GaussianJobType.OPT in job_types: + rms_forces = extract_parameter(word_matches[5], 2) + rms_disp = extract_parameter(word_matches[6], 2) + + if extended_opt_info: + max_forces = extract_parameter(word_matches[7], 2) + max_disp = extract_parameter(word_matches[8], 2) + rms_grad = extract_parameter(word_matches[9], 5) + max_grad = extract_parameter(word_matches[9], 3) + rms_int = extract_parameter(word_matches[10], 5) + max_int = extract_parameter(word_matches[10], 3) + delta_e = extract_parameter(word_matches[11], 3, cast_to_float=False) + + # ccw 10.8.2021 - ad hoc correction to Gaussian. unsure what's going on here. sometimes len(rms_forces) > len(g) + force_property_index = min(len(g), len(rms_forces)) + + for idx in range(force_property_index): + properties[idx]["rms_force"] = rms_forces[idx] + properties[idx]["rms_displacement"] = rms_disp[idx] + + if extended_opt_info: + if idx < len(max_forces): + properties[idx]["max_force"] = max_forces[idx] + + if idx < len(max_disp): + properties[idx]["max_displacement"] = max_disp[idx] + + if idx < len(max_grad): + properties[idx]["max_gradient"] = max_grad[idx] + + if idx < len(rms_grad): + properties[idx]["rms_gradient"] = rms_grad[idx] + + if idx < len(max_int): + properties[idx]["max_internal_force"] = max_int[idx] + + if idx < len(rms_int): + properties[idx]["rms_internal_force"] = rms_int[idx] + + if idx < len(delta_e): + change_in_energy = re.sub(r"Energy=", "", delta_e[idx]) + properties[idx]["predicted_change_in_energy"] = float(change_in_energy.replace('D', 'E')) + + if cctk.GaussianJobType.FREQ in job_types and len(molecules): + enthalpies = extract_parameter(word_matches[12], 6) + if len(enthalpies) == 1: + properties[-1]["enthalpy"] = enthalpies[0] + elif len(enthalpies) > 1: + raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}") + + gibbs_vals = extract_parameter(word_matches[13], 7) + if len(gibbs_vals) == 1: + properties[-1]["gibbs_free_energy"] = gibbs_vals[0] + elif len(gibbs_vals) > 1: + raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}") + + vibrational_modes = parse_modes(block_matches[11], num_atoms=molecules[-1].num_atoms(), hpmodes=re.search("hpmodes", route_card)) + molecules[-1].vibrational_modes = vibrational_modes + + frequencies = [] + try: + frequencies += extract_parameter(word_matches[14], 2) + + # very small molecules might only have 1 or 2 freqs + try: + frequencies += extract_parameter(word_matches[14], 3) + except Exception as e: + pass + try: + frequencies += extract_parameter(word_matches[14], 4) + except Exception as e: + pass + + properties[-1]["frequencies"] = sorted(frequencies) + except Exception as e: + raise ValueError("error finding frequencies") + + temperature = extract_parameter(word_matches[15], 1) + if len(temperature) == 1: + properties[-1]["temperature"] = temperature[0] + corrected_free_energy = get_corrected_free_energy(gibbs_vals[0], frequencies, frequency_cutoff=100.0, temperature=temperature[0]) + properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy) + + if cctk.GaussianJobType.NMR in job_types: + nmr_shifts, shielding_tensors = read_nmr_shifts(block_matches[12], molecules[0].num_atoms()) + if nmr_shifts is not None: + properties[-1]["isotropic_shielding"] = nmr_shifts.view(cctk.OneIndexedArray) + properties[-1]["shielding_tensors"] = shielding_tensors + + if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search("nmr=spinspin", f.route_card,flags=re.IGNORECASE): + couplings = read_j_couplings(block_matches[6], molecules[0].num_atoms()) + if couplings is not None: + properties[-1]["j_couplings"] = couplings + + if cctk.GaussianJobType.FORCE in job_types and len(molecules): + assert len(molecules) == 1, "force jobs should not be combined with optimizations!" + force_block = block_matches[7] + if len(force_block) == 0: + raise ValueError("no forces to parse!") + forces = parse_forces(force_block) + properties[0]["forces"] = forces + + if cctk.GaussianJobType.POP in job_types and len(molecules): + if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card) and len(block_matches[8]) > 0: + charges, spins = parse_hirshfeld(block_matches[8]) + properties[-1]["hirshfeld_charges"] = charges + properties[-1]["hirshfeld_spins"] = spins + + if len(molecules): + try: + charges, dipole, dipole_v = parse_charges_dipole(block_matches[9], block_matches[10]) + properties[-1]["mulliken_charges"] = charges + properties[-1]["dipole_moment"] = dipole + properties[-1]["dipole_vector"] = dipole_v + except Exception as e: + pass + + for mol, prop in zip(molecules, properties): + f.ensemble.add_molecule(mol, properties=prop) + + if fail_silently: + try: + f.check_has_properties() + except Exception as e: + # silently exclude this file + return None + else: + f.check_has_properties() + + return f + + +def parse_geometry(blocks): + nums = [] + geoms = [] + for block in blocks: + current_nums = [] + current_geoms = [] + for line in block.split("\n")[4:-2]: + if re.search("Distance", line) or re.search("Rotational constants", line): + break + + # on some jobs, the normal ending flags get cut off? but this should fix it. + # ccw 6.10.22 + if re.search("One-electron integrals computed using", line): + break + + pieces = list(filter(None, line.split(" "))) + if len(pieces) != 6: + continue + try: + current_nums.append(int(pieces[1])) + current_geoms.append([float(pieces[3]), float(pieces[4]), float(pieces[5])]) + except: + print(block) + print("\n\n") + print(line) + nums.append(current_nums) + geoms.append(current_geoms) + return nums, geoms + +def parse_header_footer(route_block, title_block, footer_block, link0_block): + link0 = dict() + route_card = "" + footer = None + title = "" + job_types = [] + + # 2 lines before 'Symbolic Z Matrix' + title = title_block[0].split("\n")[-3].strip() + + for line in route_block[0].split("\n"): + route_card += line.lstrip() + + if len(footer_block) > 0: + footer = "\n".join(list(footer_block[0].split("\n"))[1:]) # get rid of the first line + footer = "\n".join([" ".join(list(filter(None, line.split(" ")))) for line in footer.split("\n")]) + + for line in link0_block[0].split("\n"): + if re.match(" \%", line): + pieces = line[2:].split("=") + link0[pieces[0]] = pieces[1] + + for name, member in cctk.GaussianJobType.__members__.items(): + if re.search(f" {member.value}", str(route_card), re.IGNORECASE): + job_types.append(member) + if cctk.GaussianJobType.SP not in job_types: + job_types.append(cctk.GaussianJobType.SP) + + return title, link0, route_card, footer, job_types + +def parse_energies(scf_done_block): + energies = [] + iters = [] + + for line in scf_done_block: + pieces = list(filter(None, line.split(" "))) + energies.append(float(pieces[4])) + iters.append(int(pieces[7])) + + return energies, iters + +def parse_success_elapsed_time(success_lines, time_lines): + successes = len(success_lines) + elapsed_time = 0 + for line in time_lines: + fields = list(filter(None, line.split(" "))) + elapsed_time += int(fields[2]) * 86400 + int(fields[4]) * 3600 + int(fields[6]) * 60 + float(fields[8]) + return successes, elapsed_time + +def parse_charge_multiplicity(charge_line): + fields = list(filter(None, charge_line[0].replace("=", " ").split(" "))) + return int(fields[1]), int(fields[3]) + +def parse_bonds(bonding_block): + if len(bonding_block) == 0: + return None + + bond_array = [] + for line in bonding_block[0].split("\n"): + if re.search(r"! R", line): + pieces = list(filter(None, line.split(" "))) + atoms = pieces[2].replace("R", "").replace("(", "").replace(")", "").split(",") + try: + bond_array.append([int(atoms[0]), int(atoms[1])]) + except Exception as e: + raise ValueError(f"error parsing line - can't extract atoms!\n{line}\e{e}") + return bond_array + +def split_link1_to_text(filename): + link1_blocks = [] + with open(filename, "r") as lines: + current_text = "" + for idx, line in enumerate(lines): + current_text = current_text + line + if re.search("Entering Link 1", line): + link1_blocks.append(current_text) + current_text = "" + link1_blocks.append(current_text) + return link1_blocks[1:] #### the first block is just a few lines + +def extract_parameter(lines, position, cast_to_float=True): + vals = [] + for line in lines: + pieces = list(filter(None, line.split(" "))) + if cast_to_float: + try: + vals.append(float(pieces[position])) + except Exception as e: + #### sometimes RMS Force comes thru as "******" for some reason + vals.append(0) + else: + vals.append(pieces[position]) + return vals + +def parse_forces(force_block): + forces = [] + try: + split_block = force_block[0].split("\n")[2:] + except Exception as e: +# print(e) +# print("------force block-------") +# print(force_block) + raise e + for line in split_block: + fields = re.split(" +", line) + fields = list(filter(None, fields)) + + if len(fields) == 5: + forces.append([float(fields[2]), float(fields[3]), float(fields[4])]) + + return cctk.OneIndexedArray(forces) + +def parse_charges_dipole(mulliken_block, dipole_block): + charges = [] + dipole = 0 + dipole_v = np.zeros(shape=3) + + for line in mulliken_block[0].split("\n")[2:]: + fields = re.split(" +", line) + fields = list(filter(None, fields)) + + if len(fields) == 3: + charges.append(float(fields[2])) + + for line in dipole_block[0].split("\n")[1:]: + fields = re.split(" +", line) + fields = list(filter(None, fields)) + + if len(fields) == 8: + dipole_v[0] = float(fields[1]) + dipole_v[1] = float(fields[3]) + dipole_v[2] = float(fields[5]) + dipole = float(fields[7]) + break + + return cctk.OneIndexedArray(charges), dipole, dipole_v + +def parse_hirshfeld(hirshfeld_block): + charges = [] + spins = [] + + if len(hirshfeld_block) == 0: + return None, None + + for line in hirshfeld_block[0].split("\n")[2:]: + fields = re.split(" +", line) + fields = list(filter(None, fields)) + + if len(fields) == 8: + charges.append(float(fields[2])) + spins.append(float(fields[3])) + + return cctk.OneIndexedArray(charges), cctk.OneIndexedArray(spins) + +def parse_modes(freq_block, num_atoms, hpmodes=False): + freqs = list() + masses = list() + force_ks = list() + intensities = list() + displacements = list() + + if len(freq_block) == 0: + return list() + + chunks = freq_block[0].split("Freq") + + if hpmodes: + chunks = chunks[1:] + + for chunk in chunks: + lines = chunk.split("\n") + + if hpmodes: + num_cols = len(re.split(" +", lines[0])) - 2 + current_displacements = [np.zeros(shape=(num_atoms, 3)) for x in range(num_cols)] + + if len(freqs): + new_freqs = list(filter(None, re.split(" +", lines[0])))[2:] + + if float(new_freqs[-1]) <= float(freqs[-1]): + break # want to skip the non-hpmodes section, so no looping allowed + else: + freqs += new_freqs + else: + freqs += list(filter(None, re.split(" +", lines[0])))[2:] + + masses += list(filter(None, re.split(" +", lines[1])))[3:] + force_ks += list(filter(None, re.split(" +", lines[2])))[3:] + intensities += list(filter(None, re.split(" +", lines[3])))[3:] + + for line in lines[6:]: + fields = re.split(" +", line) + fields = list(filter(None, fields)) + + if len(fields) < (num_cols + 3): + continue + + if fields[0] == "Harmonic": + break + + for col_idx, val in enumerate(fields[3:]): + current_displacements[col_idx][int(fields[1])-1][int(fields[0])-1] = val + + for d in current_displacements: + displacements.append(d.view(cctk.OneIndexedArray)) + + else: + current_displacements = [list() for _ in re.split(" +", lines[0])[2:]] + + freqs += re.split(" +", lines[0])[2:] + masses += re.split(" +", lines[1])[4:] + force_ks += re.split(" +", lines[2])[4:] + intensities += re.split(" +", lines[3].rstrip())[4:] + + for line in lines[5:]: + fields = re.split(" +", line) + fields = list(filter(None, fields)) + + if len(fields) < 4: + break + + current_displacements[0].append([float(x) for x in fields[2:5]]) + + if len(current_displacements) > 1: + current_displacements[1].append([float(x) for x in fields[5:8]]) + + if len(current_displacements) > 2: + current_displacements[2].append([float(x) for x in fields[8:11]]) + + for d in current_displacements: + displacements.append(cctk.OneIndexedArray(d)) + + freqs = [float(x) for x in freqs] + masses = [float(x) for x in masses] + force_ks = [float(x) for x in force_ks] + intensities = [float(x) for x in intensities] + + assert len(freqs) == len(masses) + assert len(freqs) == len(force_ks) + assert len(freqs) == len(displacements) + + modes = list() + for f, m, k, i, d in zip(freqs, masses, force_ks, intensities, displacements): + k *= 143.9326 # mdyne Å**-1 to kcal/mol Å**-2 + modes.append(cctk.VibrationalMode(frequency=f, reduced_mass=m, force_constant=k, intensity=i, displacements=d)) + + return modes + +def read_j_couplings(lines, n_atoms): + """ + Helper method to search through output file and read J couplings + Args: + lines (list): list of lines in file + n_atoms (int): how many atoms are in the molecule + Returns: + ``couplings`` symmetric 2D np.array of couplings (in Hz) with zero-indexed atoms on both axes + or None if no couplings were found + """ + couplings = np.zeros((n_atoms,n_atoms)) + n_full_blocks, lines_in_partial_block = divmod(n_atoms,5) + n_lines = 5 * (n_full_blocks * (n_full_blocks+1) / 2) + n_full_blocks + 1 + if lines_in_partial_block > 0: + n_lines += 1 + lines_in_partial_block + n_lines = int(n_lines) + + lines = lines[0].split("\n") + + i = 0 + read_column_indices = False + read_row = False + this_column_indices = [] + while i < n_lines: + # get current line + line = lines[i] + + # if this is the header, we should be reading the column indices next + if "Total nuclear spin-spin coupling J (Hz):" in line: + i += 1 + read_column_indices = True + continue + + # this is not the header, so split the fields + fields = line.split() + + # read the column indices + if read_column_indices: +# this_n_columns = len(fields) + this_column_indices = [ int(j)-1 for j in fields ] + i += 1 + read_column_indices = False + read_row = True + continue + elif read_row: + row = int(fields[0])-1 + for j,value in enumerate(fields[1:]): + column = this_column_indices[j] + value = value.replace("D","E") + value = float(value) + couplings[row,column] = value + couplings[column,row] = value + + # check if we have read the entire matrix + if row == n_atoms - 1 and column == n_atoms - 1: + break + + # check if this is the end of the current block + if row == n_atoms - 1: + read_column_indices = True + read_row = False + i += 1 + continue + + read_row = True + i += 1 + continue + else: + raise ValueError("impossible") + + return couplings + +def parse_mp2_energies(lines): + energies = [] + for line in lines: + pieces = list(filter(None, line.split(" "))) + energy_str = pieces[5] + energy_str = re.sub("D", "E", energy_str) + energies.append(float(energy_str)) + return energies + +def parse_mp3_energies(lines): + energies = [] + for line in lines: + pieces = list(filter(None, line.split(" "))) + energy_str = pieces[3] + energy_str = re.sub("D", "E", energy_str) + energies.append(float(energy_str)) + return energies + +def parse_mp4_energies(lines): + energies = [] + for line in lines: + pieces = list(filter(None, line.split(" "))) + energy_str = pieces[3] + energy_str = re.sub("D", "E", energy_str) + energies.append(float(energy_str)) + return energies + +def parse_cc_energies(lines): + energies = [] + for line in lines: + pieces = list(filter(None, line.split(" "))) + energy_str = pieces[4] + energy_str = re.sub("D", "E", energy_str) + energies.append(float(energy_str)) + return energies + +def parse_ci_energies(lines): + return parse_cc_energies(lines) + +def read_nmr_shifts(blocks, num_atoms): + """ + Helper method to search through output file and read NMR shifts. + Args: + lines (list): list of lines in file + num_atoms (int): number of atoms expected + Returns: + list of isotropic NMR shifts (np.ndarray) + list of shielding tensors (list of 3x3 np.ndarray) + """ + # assumes that lines only come from one Link1 section + shieldings = [] + tensors = [] + for block in blocks: + lines = block.split("\n") + tensor = np.zeros(shape=(3,3)) + for line in lines: + fields = line.split() + # there are 8 on each line but we truncate the first 2 in the block selection process + if len(fields) == 6 and fields[0] == "Isotropic" and fields[3] == "Anisotropy": + fields = line.split() + assert len(fields) == 6, f"Expected 6 fields on an NMR shielding output line but found {len(fields)} instead!" + try: + shielding = float(fields[2]) + except Exception as e: + raise ValueError(f"Error parsing NMR shielding output line:\n{line}") + shieldings.append(shielding) + + # yes, this is very elegant. + tensor[0][0] = float(re.search("XX=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[0][1] = float(re.search("XY=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[0][2] = float(re.search("XZ=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[1][0] = float(re.search("YX=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[1][1] = float(re.search("YY=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[1][2] = float(re.search("YZ=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[2][0] = float(re.search("ZX=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[2][1] = float(re.search("ZY=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[2][2] = float(re.search("ZZ=\s+(?P-?\d+\.\d+)", block).group("val")) + tensors.append(tensor) + + if len(shieldings) != 0: + assert len(shieldings) == num_atoms, f"Expected {num_atoms} shieldings but found {len(shieldings)}!" + for shielding, tensor in zip(shieldings, tensors): + assert 0.01 > abs(np.trace(tensor)/3 - shielding) + return np.asarray(shieldings), tensors + else: + #### we can catch this problem later if the file is finished + return None, None + +def split_link1(filename): + """ + Splits ``filename`` into blocks by searching for "Entering Link 1". + Args: + filename (str): path to file + Returns: + list of list of lines by Link1 section; so a file with one Link1 specification would return [lines1, lines2] + """ + link1_blocks = [] + + start_block = 0 + with open(filename, "r") as lines: + for idx, line in enumerate(lines): + if re.search("Entering Link 1", line): + link1_blocks.append(cctk.LazyLineObject(file=filename, start=start_block, end=idx)) + start_block = idx + link1_blocks.append(cctk.LazyLineObject(file=filename, start=start_block, end=idx)) + + return link1_blocks[1:] #### the first block is just a few lines + + diff --git a/build/lib/cctk/parse_orca.py b/build/lib/cctk/parse_orca.py new file mode 100644 index 0000000..3670df6 --- /dev/null +++ b/build/lib/cctk/parse_orca.py @@ -0,0 +1,220 @@ +import numpy as np +import re + +from cctk.helper_functions import get_number +from cctk import OneIndexedArray, LazyLineObject + +""" +Functions to help with parsing Orca files +""" +def read_geometries(lines, num_to_find): + atomic_numbers = [] + geometries = [] + + geom_blocks = lines.search_for_block("CARTESIAN COORDINATES \(ANGSTROEM\)", "CARTESIAN COORDINATES", join="\n", count=num_to_find, max_len=1000) + if num_to_find == 1: + geom_blocks = [geom_blocks] + + for block in geom_blocks: + rows = block.split("\n") + numbers = [] + geometry = [] + + for line in rows[2:]: + if len(line.strip()) == 0: + continue + + pieces = list(filter(None, line.split(" "))) + + if len(pieces) == 4: + if re.match("[0-9]", pieces[0]): + numbers.append(int(pieces[0])) + else: + numbers.append(int(get_number(pieces[0]))) + geometry.append([float(pieces[1]), float(pieces[2]), float(pieces[3])]) + + atomic_numbers.append(OneIndexedArray(numbers, dtype=np.int8)) + geometries.append(OneIndexedArray(geometry)) + + assert len(atomic_numbers) == len(geometries) + for zs in atomic_numbers: + assert np.array_equiv(zs, atomic_numbers[0]) + return atomic_numbers[0], geometries + +def read_energies(lines): + energies = lines.find_parameter("FINAL SINGLE POINT ENERGY", 5, 4) + iters = lines.find_parameter("SCF CONVERGED AFTER", 7, 4) + return energies, iters + +def split_multiple_inputs(filename): + """ + Splits ``filename`` into blocks by searching for _________. + + Args: + filename (str): path to file + + Returns: + list of list of ``LazyLineObject`` by input section + """ + output_blocks = [] + + start_block = 0 + with open(filename, "r") as lines: + for idx, line in enumerate(lines): + if re.search("Entering Link 1", line): # this will never be true for an Orca file -- this is just a stopgap + output_blocks.append(LazyLineObject(file=filename, start=start_block, end=idx)) + start_block = idx + output_blocks.append(LazyLineObject(file=filename, start=start_block, end=idx)) + + return output_blocks + +def read_mulliken_charges(lines): + """ + Reads charges. + + Args: + lines (list): list of lines in file + + Returns: + ``cctk.OneIndexedArray`` of charges + """ + charges = [] + charge_block = lines.search_for_block("MULLIKEN ATOMIC CHARGES", "Sum of atomic charges", join="\n") + for line in charge_block.split("\n")[2:]: + fields = re.split(" +", line) + fields = list(filter(None, fields)) + + if len(fields) == 4: + charges.append(float(fields[3])) + + return OneIndexedArray(charges) + + +def read_loewdin_charges(lines): + """ + Reads charges. + + Args: + lines (list): list of lines in file + + Returns: + ``cctk.OneIndexedArray`` of charges + """ + charges = [] + charge_block = lines.search_for_block("LOEWDIN ATOMIC CHARGES", "^$", join="\n") + for line in charge_block.split("\n")[2:]: + fields = re.split(" +", line) + fields = list(filter(None, fields)) + + if len(fields) == 4: + charges.append(float(fields[3])) + + return OneIndexedArray(charges) + +def read_header(lines): + for line in lines: + if re.match("!", line): + return line + +def read_blocks_and_variables(lines): + blocks = {} + variables = {} + + current_key = None + current_val = [] + for line in lines: + if current_key is not None: + if re.match("end", line): + blocks[current_key] = current_val + current_key = None + current_val = [] + else: + current_val.append(line) + continue + if re.match("%", line): + fields = re.split(" +", line.lstrip("%")) + if len(fields) == 1: + current_key = fields[0] + else: + variables[fields[0]] = " ".join(fields[1:]) + + return variables, blocks + +def extract_input_file(lines): + input_block = lines.search_for_block("INPUT FILE", "\*\*\*\*END OF INPUT\*\*\*\*", join="\n") + input_lines = [] + for line in input_block.split("\n")[3:]: + [_, line] = line.split(">") + line = line.lstrip() + input_lines.append(line) + return input_lines + +def read_freqs(lines): + freq_block = lines.search_for_block("VIBRATIONAL FREQUENCIES", "NORMAL MODES", join="\n", max_len=1000) + if freq_block is None: + return [] + freqs = [] + for line in freq_block.split("\n"): + fields = re.split(" +", line.strip()) + if len(fields) == 3: + if fields[2] == "cm**-1" and float(fields[1]) > 0: + freqs.append(float(fields[1])) + return freqs + +def read_gradients(lines, num_to_find): + grad_blocks = lines.search_for_block("Geometry convergence", "Max\(Bonds", join="\n", count=num_to_find) + if grad_blocks is None: + return + + rms_grad = [] + max_grad = [] + rms_step = [] + max_step = [] + for grad_block in grad_blocks: + if grad_block is None: + continue + for line in grad_block.split("\n"): + fields = re.split(" +", line.strip()) + if len(fields) == 5: + if fields[0] == "RMS" and fields[1] == "gradient": + rms_grad.append(float(fields[2])) + if fields[0] == "MAX" and fields[1] == "gradient": + max_grad.append(float(fields[2])) + if fields[0] == "RMS" and fields[1] == "step": + rms_step.append(float(fields[2])) + if fields[0] == "MAX" and fields[1] == "step": + max_step.append(float(fields[2])) + + return rms_grad, max_grad, rms_step, max_step + +def read_nmr_shifts(lines, num_atoms): + """ + Helper method to search through output file and read NMR shifts. + + Args: + lines (list): list of lines in file + num_atoms (int): number of atoms expected + + Returns: + list of isotropic NMR shifts (np.ndarray) + """ + # assumes that lines only come from one Link1 section + shieldings = [] + block = lines.search_for_block("Nucleus Element", "^$", join="\n") + for line in block.split("\n")[2:]: + fields = line.split() + if len(fields) == 4: + try: + shielding = float(fields[2]) + except: + raise ValueError(f"Error parsing NMR shielding output line:\n{line}") + shieldings.append(shielding) + + if len(shieldings) != 0: + assert len(shieldings) == num_atoms, f"Expected {num_atoms} shieldings but found {len(shieldings)}!" + return np.asarray(shieldings).view(OneIndexedArray) + else: + #### we can catch this problem later if the file is finished + return None + + diff --git a/build/lib/cctk/pdb_file.py b/build/lib/cctk/pdb_file.py new file mode 100644 index 0000000..34b3848 --- /dev/null +++ b/build/lib/cctk/pdb_file.py @@ -0,0 +1,56 @@ +from cctk import File +from cctk.helper_functions import get_symbol + +class PDBFile(File): + """ + Generic class for all ``.pdb`` files. + """ + + def __init__(self, molecule, title=None): + pass + + @classmethod + def read_file(cls, filename): + pass + + @classmethod + def write_molecule_to_file(cls, filename, molecule, num=1, append=False): + """ + Write a ``.pdb`` file, using object attributes. + + Args: + filename (str): path to the new file + molecule (Molecule): ``Molecule`` object + num (int): model number + append (Bool): whether to write to file normally or append + """ + text = f"MODEL {num}\n" + + for idx, Z in enumerate(molecule.atomic_numbers, start=1): + line = molecule.get_vector(idx) + symb = get_symbol(Z).upper() + text += f"HETATM {idx:>4} {symb:<2} * 0 {line[0]:7.3f} {line[1]:7.3f} {line[2]:7.3f} 1.00 0.00 {symb:>2}\n" + + text += f"ENDMDL\n" + + if append: + super().append_to_file(filename, text) + else: + super().write_file(filename, text) + + + @classmethod + def write_ensemble_to_trajectory(cls, filename, ensemble): + """ + Writes a ``ConformationalEnsemble`` to a trajectory file. + + Args: + filename (str): where to write the file + ensemble (Ensemble): ``Ensemble`` object to write + """ + for idx, molecule in enumerate(ensemble.molecules): + if idx == 0: + cls.write_molecule_to_file(filename, molecule, num=idx+1, append=False) + else: + cls.write_molecule_to_file(filename, molecule, num=idx+1, append=True) + diff --git a/build/lib/cctk/point_charge.py b/build/lib/cctk/point_charge.py new file mode 100644 index 0000000..e5efeae --- /dev/null +++ b/build/lib/cctk/point_charge.py @@ -0,0 +1,18 @@ +import numpy as np + +class PointCharge(): + """ + Represents a point charge. + + Attributes: + coordinates (np.ndarray): 3-element ndarray + charge (float): charge + """ + + def __init__(self, coordinates, charge): + assert isinstance(coordinates, (np.ndarray, list)), "coordinates must be list or ndarray!" + assert len(coordinates) == 3, "coordinates must have len 3!" + self.coordinates = np.array(coordinates) + + assert isinstance(charge, (float, int)), "charge must be numeric" + self.charge = float(charge) diff --git a/build/lib/cctk/quasiclassical.py b/build/lib/cctk/quasiclassical.py new file mode 100644 index 0000000..1b09eb1 --- /dev/null +++ b/build/lib/cctk/quasiclassical.py @@ -0,0 +1,214 @@ +""" +Functions to assist in sampling thermally excited states through quasiclassical approximations. +""" + +import numpy as np +import math, copy, random + +import cctk + +""" +Constants: +""" + +AMU_A2_FS2_PER_KCAL_MOL = 0.0004184 +BOLTZMANN_CONSTANT = 0.001985875 # kcal/mol•Kn + +def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities=False, which="quasiclassical", mode_options=None): + """ + Perturbs molecule by treating each mode as a quantum harmonic oscillator and sampling from the distribution appropriate to the temperature. + + This is probably the only useful function in this file. + + Args: + molecule (cctk.Molecule): molecule with vibrational modes + temperature (float): temperature + return velocities (bool): whether or not to return velocities + which (str): ``classical`` or ``quasiclassical`` + mode_options (dict): + Options for how to initialize specific modes. + key (int): 1-indexed number of vibrational mode (from smallest frequency to largest) + val (dict): + velocity (str): one of "positive", "negative", "random", "zero" + displacement (bool): whether or not to displace + + Returns: + new ``cctk.Molecule`` object + energy above ground state (kcal/mol) + velocities (cctk.OneIndexedArray) + """ + assert isinstance(molecule, cctk.Molecule), "need a valid molecule" + assert len(molecule.vibrational_modes) > 0, "molecule needs to have vibrational modes (try running a ``freq`` job)" + assert isinstance(temperature, (int, float)), "temperature must be numeric" + + mol = copy.deepcopy(molecule) + total_PE = 0 + total = 0 + + velocities = np.zeros_like(molecule.geometry.view(np.ndarray)).view(cctk.OneIndexedArray) + + if mode_options is None: + mode_options = dict() + + all_text = "" + for idx, mode in enumerate(mol.vibrational_modes): + # enumerate is 0-indexed but GaussView, etc 1-index the modes. so we're 1-indexing here too. + if idx+1 in mode_options: + PE, KE, TE, mode_velocity, text = apply_vibration(mol, mode, temperature=temperature, which=which, **mode_options[idx+1]) + else: + PE, KE, TE, mode_velocity, text = apply_vibration(mol, mode, temperature=temperature, which=which) + total_PE += PE + total += TE + all_text += f"Mode {idx+1}: {text}\n" + + for idx in range(1,molecule.num_atoms()+1): + velocities[idx] += mode_velocity * mode.displacements[idx] + + if return_velocities: + return mol, total_PE, total, all_text, velocities + else: # backwards compatibility + return mol, total_PE, total, all_text + +def apply_vibration(molecule, mode, min_freq=50, temperature=298, verbose=False, which="quasiclassical", displacement=True, velocity="random", **kwargs): + """ + Apply a vibration to molecule ``molecule`` (modified in-place). + + Args: + molecule (cctk.Molecule) + mode (cctk.VibrationalMode) + min_freq (float) + temperature (float) + verbose (bool) + which (str): ``quasiclassical`` or ``classical`` + displacement (bool): whether or not to displace the mode + velocity (str): ``positive``, ``negative``, ``random``, or ``zero`` + + Returns: + potential energy + kinetic energy + energy + velocities + text + """ + + if mode.frequency < 0: + which = "ts" + + if which == "quasiclassical": + level = mode.choose_level(temperature) + energy = mode.energy(level) + shift = mode.random_displacement(level=level, method=which) + method = f"qc level {level}" + elif which == "classical": + energy = random_boltzmann_energy(temperature) + shift = mode.random_displacement(energy=energy, method=which) + method = "classical" + elif which == "ts": + energy = random_boltzmann_energy(temperature) + shift = 0 + method = "ts" + else: + raise ValueError(f"``which`` must be ``classical``, ``quasiclassical``, or ``ts`` - {which} does not match!") + + # the rest is common to all methods + + # transition states and low-frequency modes do not get a starting displacement + if not displacement or mode.frequency < min_freq: + shift = 0 + + max_shift = mode.classical_turning_point(energy=energy) + if max_shift == 0.0: + rel_shift = 0.0 + print("Warning: attempted to calculate relative shift when max shift is 0!") + else: + if shift > max_shift: + print("Warning: requested shift of {shift:.4E} exceeds the max_shift of {max_shift:.4E}!") + shift = max_shift + rel_shift = shift/max_shift + + # apply displacements and compute energy breakdown + molecule.geometry += mode.displacements * rel_shift * max_shift + potential_energy = 0.5 * mode.force_constant * shift ** 2 + kinetic_energy = energy - potential_energy + + # mode velocity = sqrt(2 * KE / reduced mass) - want value in Å/fs + # https://stackoverflow.com/questions/46820182/randomly-generate-1-or-1-positive-or-negative-integer + mode_velocity = math.sqrt(2*kinetic_energy*AMU_A2_FS2_PER_KCAL_MOL/mode.reduced_mass) + + # choose velocity sign + if velocity == "random": + mode_velocity *= (1 if random.random() < 0.5 else -1) + elif velocity == "negative": + mode_velocity *= -1 + elif velocity == "zero": + mode_velocity = 0 + elif velocity != "positive": + raise ValueError(f"unknown value {velocity} for keywork ``velocity`` - must be ``positive``, ``negative``, ``random``, or ``zero``") + + text = f"{mode.frequency:.1f} cm-1 ({energy:4.2f} kcal/mol)\t{method}\t Shift {shift:5.2f} of {max_shift:4.2f} Å ({rel_shift:5.0%})" + text += f"\tPE = {potential_energy:4.2f} kcal/mol\tKE = {kinetic_energy:4.2f} kcal/mol\tk = {mode.force_constant:.1f} kcal/mol Å^-2" + if not displacement: + text += "\n\t\tDisplacement manually set to zero!\n" + if velocity == "zero": + text += "\n\t\tVelocity manually set to zero!\n" + if verbose: + print(text) + + return potential_energy, kinetic_energy, energy, mode_velocity, text + +def get_hermite_polynomial(n): + """ + Returns a ``np.poly1d`` object representing the degree-n Hermite polynomial. + + Adapted from https://scipython.com/blog/the-harmonic-oscillator-wavefunctions/. + """ + assert isinstance(n, int) and n >= 0, "need positive integer" + + Hr = [None] * (n + 1) + Hr[0] = np.poly1d([1.,]) + + if n > 0: + Hr[1] = np.poly1d([2., 0.]) + + if n > 1: + for v in range(2, n+1): + Hr[v] = Hr[1]*Hr[v-1] - 2*(v-1)*Hr[v-2] + return Hr[n] + +def random_boltzmann_energy(temperature, cutoff=10, step1=0.01, step2=0.0001): + """ + Randomly samples from the Boltzmann distribution appropriate for the given temperature. + + Arguments: + temperature (int or float): in K + cutoff (int or float): max energy considered, in kT + step1: coarse numerical step, in kT + step2: fine numerical step, in kT + """ + kT = temperature * BOLTZMANN_CONSTANT + + random = np.random.uniform() + + # cumulative Boltzmann + cumulative_boltzmann = lambda e: math.erf(math.sqrt(e)) + + # now we need to numerically invert the cumulative Boltzmann + # kT = 1 for all this math, we'll fix it at the end + trial_energy = -1 + + # scan up to cutoff kT, which should be more than enough + for i in np.arange(0, cutoff, step1): + if cumulative_boltzmann(i) > random: + trial_energy = i - step1 + break + + if trial_energy == -1: + return cutoff * kT + + # retry in smaller increments + for i in np.arange(trial_energy, trial_energy+step1, step2): + if cumulative_boltzmann(i) > random: + trial_energy = i - step2 + break + + return trial_energy * kT diff --git a/build/lib/cctk/si_file.py b/build/lib/cctk/si_file.py new file mode 100644 index 0000000..985d845 --- /dev/null +++ b/build/lib/cctk/si_file.py @@ -0,0 +1,89 @@ +import cctk +from cctk.helper_functions import get_symbol + + +class SIFile(cctk.File): + """ + Class representing Supporting Information files. + + Attributes: + titles (list of str): title of each molecule + ensemble (cctk.Ensemble): ``cctk.Ensemble`` of molecules to print + """ + + def __init__(self, ensemble, titles): + if ensemble and isinstance(ensemble, cctk.Ensemble): + self.ensemble = ensemble + else: + raise ValueError(f"invalid ensemble {ensemble}!") + + assert len(titles) == len(ensemble) + self.titles = titles + + def write_file(self, filename, write_xyz=False, write_dir=None): + """ + Write an SI file. + + Args: + filename (str): path to the new file + write_xyz (Bool): whether or not to write ``.xyz`` files for each molecule + write_dir (str): where to write them too + """ + first = True + for title, (molecule, properties) in zip(self.titles, self.ensemble.items()): + assert isinstance(molecule, cctk.Molecule), "molecule is not a valid Molecule object!" + + text = f"{title}\n" + for key, value in generate_info(molecule, properties).items(): + text += f"{key}:\t{value}\n" + + text += f"Cartesian Coordinates (Å):\n" + for index, Z in enumerate(molecule.atomic_numbers, start=1): + line = molecule.get_vector(index) + text += f"{get_symbol(Z):>2} {line[0]:>13.6f} {line[1]:>13.6f} {line[2]:>13.8f}\n" + + text += "\n" + + if write_xyz and write_dir is not None: + cctk.XYZFile.write_molecule_to_file(f"{write_dir}/{title}.xyz", molecule) + + if first: + super().write_file(filename, text) + first = False + else: + super().append_to_file(filename, text) + + +def generate_info(molecule, properties): + info = { + "Number of Atoms": molecule.num_atoms(), + "Stoichiometry": molecule.formula(), + "Charge": molecule.charge, + "Multiplicity": molecule.multiplicity, + } + + # for now manually handling route card and imaginaries, which typically aren't linked to cctk.Molecule. + # long-term would be good to manually pass an extra info_dict from the calling environment + # to avoid these ad hoc carveouts. ccw 3.8.21 + + if "route_card" in properties: + info["Route Card"] = properties["route_card"] + + if "imaginaries" in properties: + info["Imaginary Frequencies (cm-1)"] = properties["imaginaries"] + else: + info["Imaginary Frequencies (cm-1)"] = "None" + + if "energy" in properties: + info["Energy"] = properties["energy"] + if "enthalpy" in properties: + info["Enthalpy"] = properties["enthalpy"] + if "gibbs_free_energy" in properties: + info["Gibbs Free Energy"] = properties["gibbs_free_energy"] + if "quasiharmonic_gibbs_free_energy" in properties: + info["Gibbs Free Energy (Quasiharmonic Correction)"] = properties["quasiharmonic_gibbs_free_energy"] + if "dipole_moment" in properties: + info["Dipole Moment (Debye)"] = properties["dipole_moment"] + + return info + diff --git a/build/lib/cctk/topology.py b/build/lib/cctk/topology.py new file mode 100644 index 0000000..943728a --- /dev/null +++ b/build/lib/cctk/topology.py @@ -0,0 +1,267 @@ +""" +Functions to handle 3D topology, graph structure, etc of ``Molecule`` objects. + +Moved out of ``cctk.Molecule`` because the file was getting unwieldy. +""" + +import numpy as np +import networkx as nx +import copy + +from cctk.helper_functions import ( + compute_chirality, +) + +def are_isomorphic(mol1, mol2, return_ordering=False): + """ + Checks if two molecules are isomorphic (by comparing bond graphs and atomic numbers - not bond orders!). + + Args: + mol1 (cctk.Molecule): + mol2 (cctk.Molecule): + return_ordering (Bool): if True, also returns a mapping between atomic numbers + + Returns: + Boolean denoting if the molecules are isomorphic + (optional) mapping list + """ + assert mol1.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + assert mol2.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + + mol1._add_atomic_numbers_to_nodes() + mol2._add_atomic_numbers_to_nodes() + + nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0) + match = nx.algorithms.isomorphism.GraphMatcher(mol1.bonds, mol2.bonds, node_match=nm) + + if match.is_isomorphic(): + if return_ordering: + new_ordering = [match.mapping[x] for x in range(1, mol1.num_atoms() + 1)] + return True, new_ordering + else: + return True + else: + if return_ordering: + return False, None + else: + return False + +def flip_meso_rings(mol, atoms): + """ + Returns a list of permuted molecules with various ``meso`` rings renumbered. + + Args: + mol (cctk.Molecule): molecule of interest + atoms (list): atomic numbers of potential atoms to consider + + Returns: + list of ``Molecule`` objects + """ + #### get all rings in graph + returns = [copy.deepcopy(mol)] + for center in atoms: + cycles = nx.cycle_basis(mol.bonds, root=center) + for cycle in cycles: + #### get the correct ring + if center not in cycle: + continue + + #### reorder to put ``center`` first + while cycle[0] != center: + # why yes, this /is/ a O(n) solution for reordering a list. why do you ask? + cycle = cycle[1:] + cycle[0:1] + assert cycle[0] == center, "graph reorder failed" + + #### create fragments + frag1 = [cycle.pop(1)] + frag2 = [cycle.pop(-1)] + while len(cycle) > 2: + frag1.append(cycle.pop(1)) + frag2.append(cycle.pop(-1)) + + #### cut fragment bonds, depending on if we have even- or odd-numbered ring + new_returns = [] + for mol in returns: + cpy = copy.deepcopy(mol) + cpy.remove_bond(frag1[0], cycle[0]) + cpy.remove_bond(frag2[0], cycle[0]) + if len(cycle) == 1: + cpy.remove_bond(frag1[-1], frag2[-1]) + elif len(cycle) == 2: + cpy.remove_bond(frag1[-1], cycle[-1]) + cpy.remove_bond(frag2[-1], cycle[-1]) + + #### generate graphs + graph1 = None + graph2 = None + fragments = nx.connected_components(cpy.bonds) + for fragment in fragments: + if frag1[0] in fragment: + graph1 = cpy.bonds.subgraph(fragment) + if frag2[0] in fragment: + graph2 = cpy.bonds.subgraph(fragment) + + assert isinstance(graph1, nx.Graph), "can't find graph 1" + assert isinstance(graph2, nx.Graph), "can't find graph 1" + + #### do our two ring-halves match?? if so, we swap them + nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0) + match = nx.algorithms.isomorphism.GraphMatcher(graph1, graph2, node_match=nm) + + if match.is_isomorphic(): + for k,v in match.mapping.items(): + cpy = cpy.swap_atom_numbers(k, v) + + #### redo all the bonds we ablated + if len(cycle) == 1: + cpy.add_bond(frag1[-1], frag2[-1], mol.get_bond_order(frag1[-1], frag2[-1])) + elif len(cycle) == 2: + cpy.add_bond(frag1[-1], cycle[-1], mol.get_bond_order(frag1[-1], cycle[-1])) + cpy.add_bond(frag2[-1], cycle[-1], mol.get_bond_order(frag2[-1], cycle[-1])) + cpy.add_bond(frag1[0], cycle[0], mol.get_bond_order(frag1[0], cycle[0])) + cpy.add_bond(frag2[0], cycle[0], mol.get_bond_order(frag2[0], cycle[0])) + + new_returns.append(cpy) + returns = returns + new_returns + return returns + +def exchange_identical_substituents(mol, center, self_permutations=None): + """ + Replace homotopic/enantiotopic/diastereotopic substituents about a single atom. + + If a list of permuted ``Molecule`` objects is passed (as ``self_permutations``), then this code will apply this to each member and return a list. + + Args: + mol (cctk.Molecule): molecule of interest + center (integer): atomic number of atom to swap substituents around + self_permutations (list of Molecules): optional list of starting ``Molecule`` objects + + Returns: + ``Molecule`` object (or list if ``self_permutations`` is not ``None``) + """ + assert mol.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + mol._add_atomic_numbers_to_nodes() + neighbors = list(mol.bonds[center]) + + returns = [copy.deepcopy(mol)] + if self_permutations is not None: + returns = self_permutations + + + for i in range(len(neighbors)): + for j in range(i+1, len(neighbors)): + try: + _, frag1 = mol._get_bond_fragments(center, neighbors[i]) + _, frag2 = mol._get_bond_fragments(center, neighbors[j]) + + graph1 = mol.bonds.subgraph(frag1) + graph2 = mol.bonds.subgraph(frag2) + + nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0) + match = nx.algorithms.isomorphism.GraphMatcher(graph1, graph2, node_match=nm) + if match.is_isomorphic(): + for m in returns: + new_mol = copy.deepcopy(m) + for k,v in match.mapping.items(): + new_mol = new_mol.swap_atom_numbers(k, v) + if self_permutations is None: + return new_mol + + returns.append(new_mol) + + except ValueError as e: + pass # probably indicates a cycle + + if self_permutations is None: + raise ValueError("could not find substituents to switch") + else: + return returns + +def get_chirality_report(mol, centers=None): + """ + Computes chirality at stereogenic centers. + + Args: + mol (cctk.Molecule): molecule of interest + centers (list): atomic numbers to check. defaults to all centers with 4+ substituents. + + Returns: + dict with centers as keys and ±1 as values + """ + if centers is None: + centers = get_stereogenic_centers(mol) + assert isinstance(centers, list) + + results = {} + for center in centers: + neighbors = list(mol.bonds[center]) + neighbors.sort() + assert len(neighbors) >= 4, f"atom {center} has fewer than 4 neighbors ({neighbors})!" + results[center] = compute_chirality(*[mol.get_vector(n, center) for n in neighbors]) + + return results + +def get_stereogenic_centers(mol): + """ + Returns every atom making 4 or more bonds. A bit misleading, since diastereotopic protons/meso protons are also counted. + """ + assert mol.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + num_neighbors = np.array([len(list(mol.bonds[x])) for x in range(1, mol.num_atoms() + 1)]) + return [int(x) for x in list(np.ravel(np.argwhere(num_neighbors >= 4)) + 1)] # love me some off-by-one indexing errors + +def get_exchangeable_centers(mol): + """ + Returns all atoms making 4 or more bonds that have two isomorphic substituents, i.e. where renumbering could be broken. + """ + centers = get_stereogenic_centers(mol) + exchangeable_centers = [] + for center in centers: + try: + exchange_identical_substituents(mol, center) + exchangeable_centers.append(center) + continue + except Exception as e: + pass + + mols = flip_meso_rings(mol, atoms=[center]) + if len(mols) > 1: + exchangeable_centers.append(center) + + return exchangeable_centers + +def find_group(mol, group): + """ + Finds instances of ``group`` within ``mol``. + + Args: + mol (cctk.Molecule): molecule to search within + group (cctk.Group): group to search for + + Returns: + list of dictionaries mapping from molecule atomic numbers to group atomic numbers + """ + assert mol.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + assert group.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" + + mol._add_atomic_numbers_to_nodes() + group._add_atomic_numbers_to_nodes() + group_map = group.map_from_truncated() + group.remove_atom(group.attach_to) + + nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0) + match = nx.algorithms.isomorphism.GraphMatcher(mol.bonds, group.bonds, node_match=nm) + + #### need to only find unique mappings - combinations, not permutations + mappings = [] + for sg in match.subgraph_isomorphisms_iter(): + unique = True + for m in mappings: + if set(m.keys()) == set(sg.keys()): + unique = False + break + if unique: + mappings.append(sg) + + composition = [{k: group_map[v] for k, v in m.items()} for m in mappings] + return composition + diff --git a/build/lib/cctk/vibrational_mode.py b/build/lib/cctk/vibrational_mode.py new file mode 100644 index 0000000..28dfaab --- /dev/null +++ b/build/lib/cctk/vibrational_mode.py @@ -0,0 +1,217 @@ +import math +import numpy as np + +import cctk +from cctk.quasiclassical import get_hermite_polynomial + +# constants +MAX_QHO_LEVEL = 10000 +MIN_FREQUENCY = 2 +MIN_TEMPERATURE = 10 +MAX_ZPE_RATIO = 0.999999 + +BOLTZMANN_CONSTANT = 0.001985875 # kcal/mol•K + +class VibrationalMode: + """ + Most code adapted from ``jprogdyn``. Displacements will be very low accuracy unless ``freq=hpmodes`` is enabled. + + Values from Gaussian, for now: see https://gaussian.com/vib/. + + Attributes: + frequency (float): frequency, in cm-1 + force_constant (float): force constant, in kcal/mol per Å + reduced_mass (float): mass, in amus + intensity (float): IR intensity + displacements (cctk.OneIndexedArray): atom displacements + velocities (cctk.OneIndexedArray): atom velocities + + """ + def __init__(self, frequency, force_constant, reduced_mass, intensity, displacements): + assert isinstance(frequency, float) + self.frequency = frequency + + assert isinstance(force_constant, float) + self.force_constant = force_constant + + assert isinstance(reduced_mass, float) + self.reduced_mass = reduced_mass + + assert isinstance(intensity, float) + self.intensity = intensity + + assert isinstance(displacements, cctk.OneIndexedArray) + self.displacements = displacements + + def __str__(self): + return f"Vibrational mode ({self.frequency:.2f} cm-1, {self.reduced_mass:.2f} amus, {self.force_constant:.2f} kcal/mol Å**-2)" + + def __repr__(self): + return f"Vibrational mode ({self.frequency:.2f} cm-1, {self.reduced_mass:.2f} amus, {self.force_constant:.2f} kcal/mol Å**-2)" + + def choose_level(self, temperature=298): + if temperature < MIN_TEMPERATURE: + return 0 + + # zpe_ratio is probability of being in level i vs level i+1, by quantum harmonic oscillator + zpe_ratio = math.exp( -2 * self.energy() / (BOLTZMANN_CONSTANT * temperature)) + if zpe_ratio > MAX_ZPE_RATIO: + zpe_ratio = MAX_ZPE_RATIO + + # probability of being in state 0 is equal to 1 - zpe_ratio + # 1 = P(0) + P(1) + P(2) + ... = P + P * zpe_ratio + P * zpe_ratio ** 2 + ... + # 1 = P(0) / (1 - zpe_ratio) bc geometric series + P = 1.0 - zpe_ratio + + random = np.random.uniform() + level = 0 + while level < MAX_QHO_LEVEL: + if random < P: + return level + else: + P += P * zpe_ratio + level += 1 + + return level + + def energy(self, level=0): + """ + Calculate energy as a function of level. By default returns zero-point energy (level = 0). + + Args: + level (int): which vibrational level the mode is in + + Returns: + energy (kcal/mol) + """ + assert isinstance(level, int) and level >= 0, "need positive integer for vibrational level" + + freq = self.frequency + if freq < MIN_FREQUENCY: + freq = MIN_FREQUENCY + + # 0.5 * h * c * frequency (c in cm/s bc wavenumbers) + # 0.5 * (6.626 * 10**-34) * (3 * 10**10) * (6.026 * 10**23) / 4184) = 0.0014305 + zpe = 0.0014305 * freq + return zpe * (2 * level + 1) + + def random_displacement(self, energy=None, level=0, method="quasiclassical", max_attempts=1e5): + """ + Args: + energy (float): energy of mode (for classical case) + method (str): "quasiclassical" or "classical" + level (int): which vibrational level + max_attempts (int): how many tries you get + + Returns: + shift + """ + if method == "quasiclassical": + min_val = 0 + max_val = self.quantum_distribution_max(level) + max_x = self.classical_turning_point() + + attempts = 0 + while attempts < max_attempts: + x = np.random.uniform(-1 * max_x, max_x) + p = self.quantum_distribution_value(x, level) + + y = np.random.uniform(min_val, max_val) + if y < p: + return x + else: + attempts += 1 + + raise ValueError("max_attempts exceeded - can't get a proper initialization for this mode!") + elif method == "classical": + assert energy is not None, "need energy for classical displacement" + min_val = self.classical_distribution_value(0) + max_x = self.classical_turning_point(energy=energy) + max_val = self.classical_distribution_value(max_x) + + attempts = 0 + while attempts < max_attempts: + x = np.random.uniform(-1*max_x, max_x) + p = self.classical_distribution_value(max_x) + + y = np.random.uniform(min_val, max_val) + if y < p: + return x + else: + attempts += 1 + else: + raise ValueError(f"invalid method {method} - only ``quasiclassical`` and ``classical`` implemented currently!") + + raise ValueError("Max attempts exceeded!") + + def quantum_distribution_value(self, x, level=0): + """ + Calculate psi**2 for quantum harmonic oscillator for a given shift in Å. + + Args: + x (float): shift in Å + level (int): vibrational level + """ + assert isinstance(level, int) and level >= 0, "need positive integer for vibrational level" + + freq = self.frequency + if freq < MIN_FREQUENCY: + freq = MIN_FREQUENCY + + n = level # brevity is the soul of wit + H = get_hermite_polynomial(n) + + # following https://github.com/ekwan/Jprogdyn/blob/master/src/main/java/edu/harvard/chemistry/ekwan/Jprogdyn/HarmonicOscillatorDistribution.java, line 109 + # 4 * pi * 3 * 10**8 / (1000 * 10**20 * 6.022 * 10**23 * 6.626 * 10^-34) = 0.000094411, take it or leave it + omega_term = 9.4411e-5 * self.reduced_mass * freq + val = math.sqrt(omega_term) * math.exp(-1 * omega_term * math.pi * x ** 2 ) * (H(math.sqrt(omega_term * math.pi) * x) ** 2) / (2 ** n * math.factorial(n)) + return val + + def quantum_distribution_max(self, level=0, num_pts=1e4): + """ + Returns the maximum value of psi**2 for the quantum harmonic oscillator at a given level. + """ + assert isinstance(level, int) and level >= 0, "need positive integer for vibrational level" + + if level == 0: + return self.quantum_distribution_value(0) + + max_x = self.classical_turning_point() + + # there is certainly a better way to do this + max_p = 0 + for x in np.linspace(0, max_x, int(num_pts)): + p = self.quantum_distribution_value(x, level) + if p > max_p: + max_p = p + + return max_p + + def classical_distribution_value(self, x): + """ + Returns the value of the classical distribution at the specified ``x`` value. + """ + max_x = self.classical_turning_point() + assert (x <= max_x) and (x >= -1*max_x), "x must be in [-max_x, max_x]" + return 1/(math.pi * math.sqrt(max_x**2 - x**2)) + + def classical_turning_point(self, energy=None): + """ + Returns the maximum allowed shift based on modelling the mode as a classical harmonic oscillator (e.g. the point where potential energy is maximum). + + Args: + energy (float): energy of mode + level (int): level to compute energy for quantum harmonic oscillator + """ + if energy is None: + energy = self.energy() + else: + assert energy > 0, "cannot request turning point for 0 energy!" + + return math.sqrt(2 * energy / self.force_constant) + + def to_string(self): + ... + + def from_string(self): + ... diff --git a/build/lib/cctk/xyz_file.py b/build/lib/cctk/xyz_file.py new file mode 100644 index 0000000..8a1a3aa --- /dev/null +++ b/build/lib/cctk/xyz_file.py @@ -0,0 +1,190 @@ +import re, warnings +import numpy as np + +import cctk +from cctk.helper_functions import get_symbol, get_number + + +class XYZFile(cctk.File): + """ + Class representing plain ``.xyz`` files. + + Attributes: + titles (list of str): the title or titles from the file + ensemble (Ensemble): `Ensemble` instance + molecule (Molecule): `Molecule` instance representing the first molecule in the file. deprecated, but present for backwards compatibility. + """ + + def __init__(self, ensemble, titles): + assert isinstance(ensemble, cctk.Ensemble), "ensemble must be cctk.Ensemble" + self.ensemble = ensemble + + # backwards compatibility + self.molecule = ensemble.molecule_list()[0] + + assert isinstance(titles, list), "title must be list" + self.titles = titles + + def __getattribute__(self, name): + if name == "molecule": + warnings.warn("XYZFile attribute ``molecule`` will be removed in upcoming releases of cctk. Use ``ensemble`` attribute instead!", DeprecationWarning, stacklevel=2) + return cctk.File.__getattribute__(self, name) + + @classmethod + def read_file(cls, filename, charge=0, multiplicity=1, conformational=False): + """ + Factory method to create new XYZFile instances. + + Arguments: + filename (str): path to ``.xyz`` file + charge (int): charge of resultant molecule + multiplicity (int): multiplicity of resultant molecule + conformational (bool): whether or not it's a conformational ensemble + """ + assert isinstance(charge, int), "charge must be integer" + assert isinstance(multiplicity, int), "multiplicity must be integer" + assert multiplicity > 0, "multiplicity must be a positive integer" + + ensemble = cctk.Ensemble() + if conformational: + ensemble = cctk.ConformationalEnsemble() + titles = list() + + lines = super().read_file(filename) + current_lines = list() + for line in lines: + if re.search(r"^\s*\d+$", line) and len(current_lines) > 2: + if len(current_lines) > 0: + t, m = cls.mol_from_lines(current_lines, charge=charge, multiplicity=multiplicity) + ensemble.add_molecule(m) + titles.append(t) + current_lines = list() + current_lines.append(line) + + # catch the last molecule + if len(current_lines) > 0: + t, m = cls.mol_from_lines(current_lines, charge=charge, multiplicity=multiplicity) + ensemble.add_molecule(m) + titles.append(t) + + return XYZFile(ensemble, titles) + + @classmethod + def mol_from_lines(cls, lines, charge=0, multiplicity=1): + num_atoms = 0 + try: + num_atoms = int(lines[0]) + except: + raise ValueError("can't get the number of atoms from the first line!") + + title = lines[1] + + atomic_numbers = np.zeros(shape=num_atoms, dtype=np.int8) + geometry = np.zeros(shape=(num_atoms, 3)) + + for index, line in enumerate(lines[2:]): + # ignore blank lines + if len(line.strip()) == 0: + continue + + pieces = list(filter(None, line.split(" "))) + try: + if re.match("[0-9]", pieces[0]): + atomic_numbers[index] = int(pieces[0]) + elif re.match("([A-Za-z])+([0-9])+", pieces[0]): + # mdtraj writes in this format, for some reason + m = re.match("([A-Za-z])+([0-9])+", pieces[0]) + atomic_numbers[index] = int(get_number(m.group(1))) + else: + atomic_numbers[index] = int(get_number(pieces[0])) + geometry[index][0] = float(pieces[1]) + geometry[index][1] = float(pieces[2]) + geometry[index][2] = float(pieces[3]) + except: + raise ValueError(f"can't parse line {index+2}: {line}") + + assert num_atoms == len(atomic_numbers), "wrong number of atoms!" + molecule = cctk.Molecule(atomic_numbers, geometry, charge=charge, multiplicity=multiplicity) + return title, molecule + + @classmethod + def write_molecule_to_file(cls, filename, molecule, title="title", append=False): + """ + Write an ``.xyz`` file, using object attributes. + + Args: + filename (str): path to the new file + molecule (Molecule): molecule to write + title (str): title of file + append (Bool): whether or not to append to file + """ + assert isinstance(molecule, cctk.Molecule), "molecule is not a valid Molecule object!" + + text = f"{molecule.num_atoms()}\n" + text += f"{title}\n" + + for index, Z in enumerate(molecule.atomic_numbers, start=1): + line = molecule.get_vector(index) + text += f"{get_symbol(Z):>2} {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n" + + if append: + super().append_to_file(filename, text) + else: + super().write_file(filename, text) + + def write_file(self, filename, idx=-1): + """ + Write an ``.xyz`` file, using object attributes. + + Args: + idx (int): the index of the molecule to write + """ + assert isinstance(idx, int), "idx must be int" + self.write_molecule_to_file(filename, self.get_molecule(idx), title=self.titles[idx]) + + @classmethod + def read_trajectory(cls, filename, **kwargs): + """ + Post refactoring, just an alias for ``XYZFile.read_file()``. + """ + return cls.read_file(filename, **kwargs) + + @classmethod + def read_ensemble(cls, filename, **kwargs): + """ + Post refactoring, just an alias for ``XYZFile.read_file()``. + """ + return cls.read_file(filename, **kwargs) + + @classmethod + def write_ensemble_to_file(cls, filename, ensemble, title=None): + """ + Write a ``cctk.Ensemble`` to a single ``.xyz`` file. Can be viewed in MOLDEN. + """ + assert isinstance(ensemble, cctk.Ensemble), f"ensemble {ensemble} is not a cctk.Ensemble" + + if title is None: + title = "title" + if isinstance(title, str): + title = [title for _ in range(len(ensemble))] + assert len(title) == len(ensemble) + + for idx, (molecule, title) in enumerate(zip(ensemble._items, title)): + if idx == 0: + cls.write_molecule_to_file(filename, molecule, title=title, append=False) + else: + cls.write_molecule_to_file(filename, molecule, title=title, append=True) + + def get_molecule(self, num=None): + """ + Returns a given molecule. + + If ``num`` is specified, returns ``self.ensemble.molecule_list()[num]`` + """ + # some methods pass num=None, which overrides setting the default above + if num is None: + num = -1 + assert isinstance(num, int), "num must be int" + return self.ensemble.molecule_list()[num] + + diff --git a/cctk/molecule.py b/cctk/molecule.py index 2ccd3d3..50f8597 100644 --- a/cctk/molecule.py +++ b/cctk/molecule.py @@ -9,6 +9,7 @@ from cctk.helper_functions import ( get_symbol, get_number, + get_avg_mass, compute_rotation_matrix, compute_distance_between, compute_angle_between, @@ -1830,3 +1831,45 @@ def coulomb_analysis(self, atoms1, atoms2, charges): energy += Q[i][j] / R[i][j] return energy * 627.509 # convert to kcal/mol + + def center_of_mass(self): + """ + Returns the center-of-mass of the molecule, as a ``np.array``. + """ + masses = cctk.OneIndexedArray([get_avg_mass(z) for z in self.atomic_numbers]).reshape(-1,1) + return np.sum(masses * self.geometry, axis=0) / np.sum(masses) + + def principal_axes_of_rotation(self): + """ + Compute principal axes of rotation and corresponding moments of inertia. + + See Jprogdyn, RotationalBoltzmann, lines 48–115. + + Returns: + moments of intertia (3-element np.array) - some may be zero + axes of rotation (3 x 3 np.array) + """ + # move everything to the center of mass (on a copy, let's not get too crazy here) + com = self.center_of_mass() + positions = copy.deepcopy(self.geometry.view(np.ndarray)) + positions += -1 * com + + masses = np.array([get_avg_mass(z) for z in self.atomic_numbers]).reshape(-1,1) + np.testing.assert_allclose(np.sum(masses * positions, axis=0) / np.sum(masses), 0, atol=0.00001) + + # build up mass moment of inertia tensor + Ixx, Ixy, Ixz, Iyy, Iyz, Izz = 0, 0, 0, 0, 0, 0 + for mass, position in zip(masses, positions): + Ixx += mass * (position[2]*position[2] + position[1]*position[1]) + Iyy += mass * (position[0]*position[0] + position[2]*position[2]) + Izz += mass * (position[0]*position[0] + position[1]*position[1]) + Ixy -= mass * position[0] * position[1] + Ixz -= mass * position[0] * position[2] + Iyz -= mass * position[1] * position[2] + + I = np.array([[Ixx, Ixy, Ixz], [Ixy, Iyy, Iyz], [Ixz, Iyz, Izz]]).reshape(3,3) + + # now we do an eigendecomposition on that tensor + return np.linalg.eigh(I) + + diff --git a/cctk/quasiclassical.py b/cctk/quasiclassical.py index 1b09eb1..2bcbc27 100644 --- a/cctk/quasiclassical.py +++ b/cctk/quasiclassical.py @@ -12,9 +12,10 @@ """ AMU_A2_FS2_PER_KCAL_MOL = 0.0004184 -BOLTZMANN_CONSTANT = 0.001985875 # kcal/mol•Kn +BOLTZMANN_CONSTANT = 0.001985875 # kcal/mol•K +TEMP_TO_eV = 8.61733238e-5 # eV/K -def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities=False, which="quasiclassical", mode_options=None): +def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities=False, which="quasiclassical", mode_options=None, do_rotation=True): """ Perturbs molecule by treating each mode as a quantum harmonic oscillator and sampling from the distribution appropriate to the temperature. @@ -31,6 +32,7 @@ def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities val (dict): velocity (str): one of "positive", "negative", "random", "zero" displacement (bool): whether or not to displace + do_rotation (bool): whether or not to apply classical rotational initialization. Returns: new ``cctk.Molecule`` object @@ -64,6 +66,37 @@ def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities for idx in range(1,molecule.num_atoms()+1): velocities[idx] += mode_velocity * mode.displacements[idx] + if do_rotation: + moments, axes_of_rotation = mol.principal_axes_of_rotation() + omega_axis1, omega_axis2, omega_axis3 = 0, 0, 0 + + # get energy for each principal axis, and convert it to angular frequency + # we randomize the sign here... + # energy in kcal/mol, unlike Jprogdyn + energy_axis1 = random_boltzmann_energy(temperature) + if moments[0] > 0: + omega_axis1 = (1 if random.random() < 0.5 else -1) * np.sqrt(2*energy_axis1 / (moments[0]*AMU_A2_FS2_PER_KCAL_MOL)) + + energy_axis2 = random_boltzmann_energy(temperature) + if moments[1] > 0: + omega_axis2 = (1 if random.random() < 0.5 else -1) * np.sqrt(2*energy_axis2 / (moments[0]*AMU_A2_FS2_PER_KCAL_MOL)) + + energy_axis3 = random_boltzmann_energy(temperature) + if moments[2] > 0: + omega_axis3 = (1 if random.random() < 0.5 else -1) * np.sqrt(2*energy_axis3 / (moments[0]*AMU_A2_FS2_PER_KCAL_MOL)) + + # add energy to total energy counter + total += energy_axis1 + energy_axis2 + energy_axis3 + + # total rotational velocity is linear combination along principal axes + omega = omega_axis1*axes_of_rotation[0] + omega_axis2*axes_of_rotation[1] + omega_axis3*axes_of_rotation[2] + + # now turn this into Cartesian velocity for each atom + shifted_positions = copy.deepcopy(mol.geometry) + shifted_positions -= mol.center_of_mass() + for idx in range(1, mol.num_atoms()+1): + velocities[idx] += np.cross(omega, shifted_positions[idx]) + if return_velocities: return mol, total_PE, total, all_text, velocities else: # backwards compatibility diff --git a/setup.py b/setup.py index 596831c..bbe3938 100644 --- a/setup.py +++ b/setup.py @@ -11,16 +11,17 @@ packages=["cctk", "cctk.data", "cctk.groups"], # include_package_data=True, package_data={"cctk.data": ["*"], "cctk.groups": ["*"],}, - version="v0.2.13", + version="v0.2.14", license="Apache 2.O", description="computational chemistry toolkit", author="Corin Wagen and Eugene Kwan", author_email="corin.wagen@gmail.com", url="https://github.com/ekwan/cctk", - download_url="https://github.com/ekwan/cctk/archive/v0.2.13.tar.gz", + download_url="https://github.com/ekwan/cctk/archive/v0.2.14.tar.gz", install_requires=["numpy", "networkx", "importlib_resources", "scipy", "pyahocorasick", "basis_set_exchange", "pyyaml"], long_description=long_description, long_description_content_type='text/markdown', + python_requires='>=3.6', classifiers=[ "Development Status :: 4 - Beta", "License :: OSI Approved :: Apache Software License", diff --git a/test/static/h2.xyz b/test/static/h2.xyz new file mode 100644 index 0000000..1d25adb --- /dev/null +++ b/test/static/h2.xyz @@ -0,0 +1,4 @@ +2 +HeH +H 0 0 0 +H 0 0 0.77260 diff --git a/test/test_freqs.py b/test/test_freqs.py index 6343553..0cb43ad 100644 --- a/test/test_freqs.py +++ b/test/test_freqs.py @@ -72,7 +72,7 @@ def test_perturb_water(self): 3: {"velocity": "zero"}, } - mol3, e, _, _, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, mode_options=mo) + mol3, e, _, _, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, mode_options=mo, do_rotation=False) self.assertTrue(isinstance(mol3, cctk.Molecule)) self.assertFalse(np.any(v)) # all should be zero, AKA False @@ -81,7 +81,7 @@ def test_perturb_water(self): 2: {"velocity": "positive", "displacement": False}, 3: {"velocity": "positive", "displacement": False}, } - mol4, e, te, text, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, mode_options=mo) + mol4, e, te, text, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, mode_options=mo, do_rotation=False) self.assertTrue(te - 13.28839457 < 0.00001) mol5, e, te, text, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, which="classical") diff --git a/test/test_molecule.py b/test/test_molecule.py index b3509fb..55ad810 100644 --- a/test/test_molecule.py +++ b/test/test_molecule.py @@ -228,5 +228,17 @@ def test_coulomb_analysis(self): # print(mol.coulomb_analysis(atoms1, atoms2, charges)) + def test_rotation(self): + mol = self.load_molecule() + m1, a1 = mol.principal_axes_of_rotation() + ([ 672.04715793, 2908.52501403, 3481.42757748]) + self.assertTrue(m1[0] - 672.05 < 0.1) + self.assertTrue(m1[1] - 2908.53 < 0.1) + self.assertTrue(m1[2] - 3841.43 < 0.1) + + h2 = cctk.XYZFile.read_file("test/static/h2.xyz").get_molecule() + m2, a2, = h2.principal_axes_of_rotation() + self.assertEqual(m2[0], 0.0) # first moment ought to be zero + if __name__ == '__main__': unittest.main()