From 3742529927541b84275e4e13dff3f36407eb2990 Mon Sep 17 00:00:00 2001
From: Corin Wagen <corin.wagen@gmail.com>
Date: Tue, 18 Oct 2022 15:47:06 -0400
Subject: [PATCH] add rotational initialization

---
 build/lib/cctk/__init__.py             |   18 +
 build/lib/cctk/array.py                |  135 ++
 build/lib/cctk/data/__init__.py        |    0
 build/lib/cctk/data/covalent_radii.csv |   97 ++
 build/lib/cctk/data/isotopes.csv       |  355 +++++
 build/lib/cctk/data/vdw_radii.csv      |   76 +
 build/lib/cctk/ensemble.py             |  663 +++++++++
 build/lib/cctk/file.py                 |   81 ++
 build/lib/cctk/gaussian_file.py        |  757 ++++++++++
 build/lib/cctk/group.py                |  277 ++++
 build/lib/cctk/groups/AcH.mol2         |   29 +
 build/lib/cctk/groups/BrH.mol2         |   19 +
 build/lib/cctk/groups/CF3H.mol2        |   25 +
 build/lib/cctk/groups/CHOH.mol2        |   23 +
 build/lib/cctk/groups/ClH.mol2         |   19 +
 build/lib/cctk/groups/EtH.mol2         |   31 +
 build/lib/cctk/groups/FH.mol2          |   19 +
 build/lib/cctk/groups/HCN.mol2         |   21 +
 build/lib/cctk/groups/HCO2Me.mol2      |   31 +
 build/lib/cctk/groups/HNO2.mol2        |   23 +
 build/lib/cctk/groups/IH.mol2          |   19 +
 build/lib/cctk/groups/MeH.mol2         |   25 +
 build/lib/cctk/groups/NH3.mol2         |   23 +
 build/lib/cctk/groups/NHAcH.mol2       |   33 +
 build/lib/cctk/groups/NMe2H.mol2       |   35 +
 build/lib/cctk/groups/OH2.mol2         |   21 +
 build/lib/cctk/groups/OMeH.mol2        |   27 +
 build/lib/cctk/groups/SF5H.mol2        |   29 +
 build/lib/cctk/groups/SO3HH.mol2       |   27 +
 build/lib/cctk/groups/__init__.py      |    0
 build/lib/cctk/groups/iPrH.mol2        |   37 +
 build/lib/cctk/groups/tBuH.mol2        |   43 +
 build/lib/cctk/helper_functions.py     |  708 +++++++++
 build/lib/cctk/lines.py                |  163 +++
 build/lib/cctk/load_groups.py          |  109 ++
 build/lib/cctk/mae_file.py             |  278 ++++
 build/lib/cctk/mol2_file.py            |  351 +++++
 build/lib/cctk/molecule.py             | 1832 ++++++++++++++++++++++++
 build/lib/cctk/optimize.py             |  181 +++
 build/lib/cctk/orca_file.py            |  375 +++++
 build/lib/cctk/parse_gaussian.py       |  768 ++++++++++
 build/lib/cctk/parse_orca.py           |  220 +++
 build/lib/cctk/pdb_file.py             |   56 +
 build/lib/cctk/point_charge.py         |   18 +
 build/lib/cctk/quasiclassical.py       |  214 +++
 build/lib/cctk/si_file.py              |   89 ++
 build/lib/cctk/topology.py             |  267 ++++
 build/lib/cctk/vibrational_mode.py     |  217 +++
 build/lib/cctk/xyz_file.py             |  190 +++
 cctk/molecule.py                       |   43 +
 cctk/quasiclassical.py                 |   37 +-
 setup.py                               |    5 +-
 test/static/h2.xyz                     |    4 +
 test/test_freqs.py                     |    4 +-
 test/test_molecule.py                  |   12 +
 55 files changed, 9153 insertions(+), 6 deletions(-)
 create mode 100644 build/lib/cctk/__init__.py
 create mode 100644 build/lib/cctk/array.py
 create mode 100644 build/lib/cctk/data/__init__.py
 create mode 100644 build/lib/cctk/data/covalent_radii.csv
 create mode 100644 build/lib/cctk/data/isotopes.csv
 create mode 100644 build/lib/cctk/data/vdw_radii.csv
 create mode 100644 build/lib/cctk/ensemble.py
 create mode 100644 build/lib/cctk/file.py
 create mode 100644 build/lib/cctk/gaussian_file.py
 create mode 100644 build/lib/cctk/group.py
 create mode 100644 build/lib/cctk/groups/AcH.mol2
 create mode 100644 build/lib/cctk/groups/BrH.mol2
 create mode 100644 build/lib/cctk/groups/CF3H.mol2
 create mode 100644 build/lib/cctk/groups/CHOH.mol2
 create mode 100644 build/lib/cctk/groups/ClH.mol2
 create mode 100644 build/lib/cctk/groups/EtH.mol2
 create mode 100644 build/lib/cctk/groups/FH.mol2
 create mode 100644 build/lib/cctk/groups/HCN.mol2
 create mode 100644 build/lib/cctk/groups/HCO2Me.mol2
 create mode 100644 build/lib/cctk/groups/HNO2.mol2
 create mode 100644 build/lib/cctk/groups/IH.mol2
 create mode 100644 build/lib/cctk/groups/MeH.mol2
 create mode 100644 build/lib/cctk/groups/NH3.mol2
 create mode 100644 build/lib/cctk/groups/NHAcH.mol2
 create mode 100644 build/lib/cctk/groups/NMe2H.mol2
 create mode 100644 build/lib/cctk/groups/OH2.mol2
 create mode 100644 build/lib/cctk/groups/OMeH.mol2
 create mode 100644 build/lib/cctk/groups/SF5H.mol2
 create mode 100644 build/lib/cctk/groups/SO3HH.mol2
 create mode 100644 build/lib/cctk/groups/__init__.py
 create mode 100644 build/lib/cctk/groups/iPrH.mol2
 create mode 100644 build/lib/cctk/groups/tBuH.mol2
 create mode 100644 build/lib/cctk/helper_functions.py
 create mode 100644 build/lib/cctk/lines.py
 create mode 100644 build/lib/cctk/load_groups.py
 create mode 100644 build/lib/cctk/mae_file.py
 create mode 100644 build/lib/cctk/mol2_file.py
 create mode 100644 build/lib/cctk/molecule.py
 create mode 100644 build/lib/cctk/optimize.py
 create mode 100644 build/lib/cctk/orca_file.py
 create mode 100644 build/lib/cctk/parse_gaussian.py
 create mode 100644 build/lib/cctk/parse_orca.py
 create mode 100644 build/lib/cctk/pdb_file.py
 create mode 100644 build/lib/cctk/point_charge.py
 create mode 100644 build/lib/cctk/quasiclassical.py
 create mode 100644 build/lib/cctk/si_file.py
 create mode 100644 build/lib/cctk/topology.py
 create mode 100644 build/lib/cctk/vibrational_mode.py
 create mode 100644 build/lib/cctk/xyz_file.py
 create mode 100644 test/static/h2.xyz

diff --git a/build/lib/cctk/__init__.py b/build/lib/cctk/__init__.py
new file mode 100644
index 0000000..8662561
--- /dev/null
+++ b/build/lib/cctk/__init__.py
@@ -0,0 +1,18 @@
+from .file import File
+from .lines import LazyLineObject
+from .array import OneIndexedArray
+from .molecule import Molecule
+from .ensemble import Ensemble, ConformationalEnsemble
+from .group import Group
+from .vibrational_mode import VibrationalMode
+
+from .gaussian_file import GaussianJobType, GaussianFile
+from .orca_file import OrcaFile, OrcaJobType
+from .xyz_file import XYZFile
+from .mol2_file import MOL2File
+from .mae_file import MAEFile
+from .pdb_file import PDBFile
+
+from .si_file import SIFile
+
+from .point_charge import PointCharge
diff --git a/build/lib/cctk/array.py b/build/lib/cctk/array.py
new file mode 100644
index 0000000..6f14df4
--- /dev/null
+++ b/build/lib/cctk/array.py
@@ -0,0 +1,135 @@
+import numpy as np
+import copy
+
+class OneIndexedArray(np.ndarray):
+    """
+    Wrapper for ``np.ndarray`` that's indexed from one, not zero, to store atomic numbers and geometries.
+    This only works on 1D or 2D arrays. Additionally, only the first index of a 2D array will be 1-indexed.
+
+    Note that ``enumerate(one_indexed_array)`` will throw ``IndexError`` -- instead, use ``enumerate(one_indexed_array, start=1)``.
+    """
+
+    def __new__(cls, obj, **kwargs):
+        new = np.array(obj, **kwargs).view(cls)
+        return new
+
+    def __getitem__(self, index):
+        index = copy.deepcopy(index)
+        if isinstance(index, slice):
+            if index.start is None:
+                start = 0
+            else:
+                start = index.start - 1
+            if index.stop is None:
+                stop = -1
+            else:
+                stop = index.stop - 1
+            new_index = slice(start, stop, index.step)
+            return super().__getitem__(new_index)
+        elif isinstance(index, int):
+            if index > 0:
+                return super().__getitem__(index-1)
+            elif index == 0:
+                raise IndexError("this is a 1-indexed array: no element 0!")
+            elif index < 0:
+                return super().__getitem__(index)
+        elif (isinstance(index, tuple)) and (len(index) == 2):
+            if index[0] is None:
+                return super().__getitem__((index[0], index[1]))
+            elif index[0] > 0:
+                return super().__getitem__((index[0]-1, index[1]))
+            elif index[0] == 0:
+                raise IndexError("this is a 1-indexed array: no element 0!")
+            elif index[0] < 0:
+                return super().__getitem__((index[0], index[1]))
+        elif (isinstance(index, tuple)) and (len(index) == 1):
+            return self.__getitem__(index[0])
+        elif isinstance(index, np.ndarray):
+            if index.dtype == bool:
+                return super().__getitem__(index)
+            elif index.ndim == 1:
+                index[index >= 1] += -1
+                return super().__getitem__(index)
+            else:
+                index[0][index >= 1] += -1
+                return super().__getitem__(index)
+        elif isinstance(index, list):
+            if isinstance(index[0], bool):
+                return super().__getitem__(index)
+            elif isinstance(index[0], list):
+                if isinstance(index[0][0], bool):
+                    return super().__getitem__(index)
+                for i, v in enumerate(index[0]):
+                    if v >= 1:
+                        index[i] += -1
+                return super().__getitem__(index)
+            else:
+                for i, v in enumerate(index):
+                    if v >= 1:
+                        index[i] += -1
+                return super().__getitem__(index)
+        else:
+            return super().__getitem__(index)
+
+    def __setitem__(self, index, value):
+        index = copy.deepcopy(index)
+        if isinstance(index, int):
+            if index > 0:
+                if self.ndim == 1:
+                    super().__setitem__(index-1, value)
+                elif self.ndim == 2:
+                    super().__setitem__(index, value)
+                else:
+                    raise TypeError("this datatype is only defined for 1D and 2D ndarrays")
+            elif index == 0:
+                raise IndexError("this is a 1-indexed array: no element 0!")
+            elif index < 0:
+                super().__setitem__(index, value)
+        elif (isinstance(index, tuple)) and (len(index) == 2):
+            if index[0] is None:
+                super().__setitem__((index[0], index[1]), value)
+            elif index[0] > 0:
+                super().__setitem__((index[0]-1, index[1]), value)
+            elif index[0] == 0:
+                raise IndexError("this is a 1-indexed array: no element 0!")
+            elif index[0] < 0:
+                super().__setitem__((index[0], index[1]), value)
+        elif (isinstance(index, tuple)) and (len(index) == 1):
+            return self.__setitem__(index[0], value)
+        elif isinstance(index, np.ndarray):
+            if index.dtype == bool:
+                super().__setitem__(index, value)
+            elif index.ndim == 1:
+                index[index >= 1] += -1
+                super().__setitem__(index, value)
+            else:
+                index[0][index >= 1] += -1
+                super().__setitem__(index, value)
+        elif isinstance(index, list):
+            if isinstance(index[0], bool):
+                super().__setitem__(index, value)
+            elif isinstance(index[0], list):
+                if isinstance(index[0][0], bool):
+                    super().__setitem__(index, value)
+                for i, v in enumerate(index[0]):
+                    if v >= 1:
+                        index[i] += -1
+                super().__setitem__(index, value)
+            else:
+                for i, v in enumerate(index):
+                    if v >= 1:
+                        index[i] += -1
+                super().__setitem__(index, value)
+        else:
+            super().__setitem__(index, value)
+#            raise IndexError(f"invalid index {index} for OneIndexedArray")
+
+    def __iter__(self):
+        for idx in range(1,len(self)+1):
+            yield self.__getitem__(idx)
+
+    def __hash__(self):
+        return hash(self.data.tobytes())
+
+    def __str__(self):
+        return self.view(np.ndarray).__str__()
diff --git a/build/lib/cctk/data/__init__.py b/build/lib/cctk/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build/lib/cctk/data/covalent_radii.csv b/build/lib/cctk/data/covalent_radii.csv
new file mode 100644
index 0000000..ae3fe39
--- /dev/null
+++ b/build/lib/cctk/data/covalent_radii.csv
@@ -0,0 +1,97 @@
+Number,Symbol,Radius,StdDev,NumAnalyzed 
+1,H,0.31,5,129
+2,He,0.28
+3,Li,1.28,7,5789
+4,Be,0.96,3,310
+5,B,0.84,3,1770
+6,C,0.76,1,10,000
+7,N,0.71,1,2200
+8,O,0.66,2,10,000
+9,F,0.57,3,10,000
+10,Ne,0.58
+11,Na,1.66,9,1629
+12,Mg,1.41,7,3234
+13,Al,1.21,4,3206
+14,Si,1.11,2,10,000
+15,P,1.07,3,10,000
+16,S,1.05,3,10,000
+17,Cl,1.02,4,1987
+18,Ar,1.06,10,9
+19,K,2.03,12,435
+20,Ca,1.76,10,2647
+21,Sc,1.70,7,32
+22,Ti,1.60,8,231
+23,V,1.53,8,389
+24,Cr,1.39,5,916
+25,Mn,1.61,8,929
+26,Fe,1.52,6,1540
+27,Co,1.50,7,780
+28,Ni,1.24,4,1030
+29,Cu,1.32,4,1149
+30,Zn,1.22,4,443
+31,Ga,1.22,3,1330
+32,Ge,1.20,4,1013
+33,As,1.19,4,2015
+34,Se,1.20,4,1717
+35,Br,1.20,3,2140
+36,Kr,1.16,4,5
+37,Rb,2.20,9,23
+38,Sr,1.95,10,1500
+39,Y,1.90,7,30
+40,Zr,1.75,7,93
+41,Nb,1.64,6,18
+42,Mo,1.54,5,97
+43,Tc,1.47,7,96
+44,Ru,1.46,7,1032
+45,Rh,1.42,7,458
+46,Pd,1.39,6,1892
+47,Ag,1.45,5,1728
+48,Cd,1.44,9,19
+49,In,1.42,5,546
+50,Sn,1.39,4,2999
+51,Sb,1.39,5,609
+52,Te,1.38,4,692
+53,I,1.39,3,451
+54,Xe,1.40,9,2
+55,Cs,2.44,11,24
+56,Ba,2.15,11,3076
+57,La,2.07,8,190
+58,Ce,2.04,9,47
+59,Pr,2.03,7,58
+60,Nd,2.01,6,96
+61,Pm,1.99
+62,Sm,1.98,8,53
+63,Eu,1.98,6,167
+64,Gd,1.96,6,178
+65,Tb,1.94,5,55
+66,Dy,1.92,7,59
+67,Ho,1.92,7,48
+68,Er,1.89,6,66
+69,Tm,1.90,10,15
+70,Yb,1.87,8,122
+71,Lu,1.87,8,61
+72,Hf,1.75,10,53
+73,Ta,1.70,8,88
+74,W,1.62,7,219
+75,Re,1.51,7,476
+76,Os,1.44,4,99
+77,Ir,1.41,6,131
+78,Pt,1.36,5,1768
+79,Au,1.36,6,114
+80,Hg,1.32,5,137
+81,Tl,1.45,7,291
+82,Pb,1.46,5,112
+83,Bi,1.48,4,51
+84,Po,1.40,4,4
+85,At,1.50
+86,Rn,1.50
+87,Fr,2.60
+88,Ra,2.21,2,3
+89,Ac,2.15,1
+90,Th,2.06,6,11
+91,Pa,2.00,1
+92,U,1.96,7,57
+93,Np,1.90,1,22
+94,Pu,1.87,1,9
+95,Am,1.80,6,11
+96,Cm,1.69,3,16
diff --git a/build/lib/cctk/data/isotopes.csv b/build/lib/cctk/data/isotopes.csv
new file mode 100644
index 0000000..5341170
--- /dev/null
+++ b/build/lib/cctk/data/isotopes.csv
@@ -0,0 +1,355 @@
+Symbol,Number,Mass,Abundance
+H,1,1.007825,0.999885
+H,1,2.014102,0.000115
+H,1,3.016049,0.000000
+He,2,3.016029,0.000001
+He,2,4.002603,0.999999
+Li,3,6.015123,0.075900
+Li,3,7.016003,0.924100
+Be,4,9.012183,1.000000
+B,5,10.012937,0.199000
+B,5,11.009305,0.801000
+C,6,12.000000,0.989300
+C,6,13.003355,0.010700
+C,6,14.003242,0.000000
+N,7,14.003074,0.996360
+N,7,15.000109,0.003640
+O,8,15.994915,0.997570
+O,8,16.999132,0.000380
+O,8,17.999160,0.002050
+F,9,18.998403,1.000000
+Ne,10,19.992440,0.904800
+Ne,10,20.993847,0.002700
+Ne,10,21.991385,0.092500
+Na,11,22.989769,1.000000
+Mg,12,23.985042,0.789900
+Mg,12,24.985837,0.100000
+Mg,12,25.982593,0.110100
+Al,13,26.981539,1.000000
+Si,14,27.976927,0.922230
+Si,14,28.976495,0.046850
+Si,14,29.973770,0.030920
+P,15,30.973762,1.000000
+S,16,31.972071,0.949900
+S,16,32.971459,0.007500
+S,16,33.967867,0.042500
+S,16,35.967081,0.000100
+Cl,17,34.968853,0.757600
+Cl,17,36.965903,0.242400
+Ar,18,35.967545,0.003336
+Ar,18,37.962732,0.000629
+Ar,18,39.962383,0.996035
+K,19,38.963706,0.932581
+K,19,39.963998,0.000117
+K,19,40.961825,0.067302
+Ca,20,39.962591,0.969410
+Ca,20,41.958618,0.006470
+Ca,20,42.958766,0.001350
+Ca,20,43.955482,0.020860
+Ca,20,45.953689,0.000040
+Ca,20,47.952523,0.001870
+Sc,21,44.955908,1.000000
+Ti,22,45.952628,0.082500
+Ti,22,46.951759,0.074400
+Ti,22,47.947942,0.737200
+Ti,22,48.947866,0.054100
+Ti,22,49.944787,0.051800
+V,23,49.947156,0.002500
+V,23,50.943957,0.997500
+Cr,24,49.946042,0.043450
+Cr,24,51.940506,0.837890
+Cr,24,52.940648,0.095010
+Cr,24,53.938879,0.023650
+Mn,25,54.938044,1.000000
+Fe,26,53.939609,0.058450
+Fe,26,55.934936,0.917540
+Fe,26,56.935393,0.021190
+Fe,26,57.933274,0.002820
+Co,27,58.933194,1.000000
+Ni,28,57.935342,0.680770
+Ni,28,59.930786,0.262230
+Ni,28,60.931056,0.011399
+Ni,28,61.928345,0.036346
+Ni,28,63.927967,0.009255
+Cu,29,62.929598,0.691500
+Cu,29,64.927790,0.308500
+Zn,30,63.929142,0.491700
+Zn,30,65.926034,0.277300
+Zn,30,66.927128,0.040400
+Zn,30,67.924845,0.184500
+Zn,30,69.925319,0.006100
+Ga,31,68.925573,0.601080
+Ga,31,70.924703,0.398920
+Ge,32,69.924249,0.205700
+Ge,32,71.922076,0.274500
+Ge,32,72.923459,0.077500
+Ge,32,73.921178,0.365000
+Ge,32,75.921403,0.077300
+As,33,74.921595,1.000000
+Se,34,73.922476,0.008900
+Se,34,75.919214,0.093700
+Se,34,76.919914,0.076300
+Se,34,77.917309,0.237700
+Se,34,79.916522,0.496100
+Se,34,81.916699,0.087300
+Br,35,78.918338,0.506900
+Br,35,80.916290,0.493100
+Kr,36,77.920365,0.003550
+Kr,36,79.916378,0.022860
+Kr,36,81.913483,0.115930
+Kr,36,82.914127,0.115000
+Kr,36,83.911498,0.569870
+Kr,36,85.910611,0.172790
+Rb,37,84.911790,0.721700
+Rb,37,86.909181,0.278300
+Sr,38,83.913419,0.005600
+Sr,38,85.909261,0.098600
+Sr,38,86.908878,0.070000
+Sr,38,87.905613,0.825800
+Y,39,88.905840,1.000000
+Zr,40,89.904698,0.514500
+Zr,40,90.905640,0.112200
+Zr,40,91.905035,0.171500
+Zr,40,93.906311,0.173800
+Zr,40,95.908271,0.028000
+Nb,41,92.906373,1.000000
+Mo,42,91.906808,0.145300
+Mo,42,93.905085,0.091500
+Mo,42,94.905839,0.158400
+Mo,42,95.904676,0.166700
+Mo,42,96.906018,0.096000
+Mo,42,97.905405,0.243900
+Mo,42,99.907472,0.098200
+Tc,43,96.906367,0.000000
+Tc,43,97.907212,0.000000
+Tc,43,98.906251,0.000000
+Ru,44,95.907590,0.055400
+Ru,44,97.905287,0.018700
+Ru,44,98.905934,0.127600
+Ru,44,99.904214,0.126000
+Ru,44,100.905577,0.170600
+Ru,44,101.904344,0.315500
+Ru,44,103.905428,0.186200
+Rh,45,102.905498,1.000000
+Pd,46,101.905602,0.010200
+Pd,46,103.904031,0.111400
+Pd,46,104.905080,0.223300
+Pd,46,105.903480,0.273300
+Pd,46,107.903892,0.264600
+Pd,46,109.905172,0.117200
+Ag,47,106.905092,0.518390
+Ag,47,108.904755,0.481610
+Cd,48,105.906460,0.012500
+Cd,48,107.904183,0.008900
+Cd,48,109.903007,0.124900
+Cd,48,110.904183,0.128000
+Cd,48,111.902763,0.241300
+Cd,48,112.904408,0.122200
+Cd,48,113.903365,0.287300
+Cd,48,115.904763,0.074900
+In,49,112.904062,0.042900
+In,49,114.903879,0.957100
+Sn,50,111.904824,0.009700
+Sn,50,113.902783,0.006600
+Sn,50,114.903345,0.003400
+Sn,50,115.901743,0.145400
+Sn,50,116.902954,0.076800
+Sn,50,117.901607,0.242200
+Sn,50,118.903311,0.085900
+Sn,50,119.902202,0.325800
+Sn,50,121.903444,0.046300
+Sn,50,123.905277,0.057900
+Sb,51,120.903812,0.572100
+Sb,51,122.904213,0.427900
+Te,52,119.904059,0.000900
+Te,52,121.903043,0.025500
+Te,52,122.904270,0.008900
+Te,52,123.902817,0.047400
+Te,52,124.904430,0.070700
+Te,52,125.903311,0.188400
+Te,52,127.904461,0.317400
+Te,52,129.906223,0.340800
+I,53,126.904472,1.000000
+Xe,54,123.905892,0.000952
+Xe,54,125.904298,0.000890
+Xe,54,127.903531,0.019102
+Xe,54,128.904781,0.264006
+Xe,54,129.903509,0.040710
+Xe,54,130.905084,0.212324
+Xe,54,131.904155,0.269086
+Xe,54,133.905395,0.104357
+Xe,54,135.907214,0.088573
+Cs,55,132.905452,1.000000
+Ba,56,129.906321,0.001060
+Ba,56,131.905061,0.001010
+Ba,56,133.904508,0.024170
+Ba,56,134.905688,0.065920
+Ba,56,135.904576,0.078540
+Ba,56,136.905827,0.112320
+Ba,56,137.905247,0.716980
+La,57,137.907115,0.000888
+La,57,138.906356,0.999112
+Ce,58,135.907129,0.001850
+Ce,58,137.905991,0.002510
+Ce,58,139.905443,0.884500
+Ce,58,141.909250,0.111140
+Pr,59,140.907658,1.000000
+Nd,60,141.907729,0.271520
+Nd,60,142.909820,0.121740
+Nd,60,143.910093,0.237980
+Nd,60,144.912579,0.082930
+Nd,60,145.913123,0.171890
+Nd,60,147.916899,0.057560
+Nd,60,149.920902,0.056380
+Pm,61,144.912756,0.000000
+Pm,61,146.915145,0.000000
+Sm,62,143.912006,0.030700
+Sm,62,146.914904,0.149900
+Sm,62,147.914829,0.112400
+Sm,62,148.917192,0.138200
+Sm,62,149.917283,0.073800
+Sm,62,151.919740,0.267500
+Sm,62,153.922217,0.227500
+Eu,63,150.919858,0.478100
+Eu,63,152.921238,0.521900
+Gd,64,151.919800,0.002000
+Gd,64,153.920874,0.021800
+Gd,64,154.922630,0.148000
+Gd,64,155.922131,0.204700
+Gd,64,156.923969,0.156500
+Gd,64,157.924112,0.248400
+Gd,64,159.927062,0.218600
+Tb,65,158.925355,1.000000
+Dy,66,155.924285,0.000560
+Dy,66,157.924416,0.000950
+Dy,66,159.925205,0.023290
+Dy,66,160.926941,0.188890
+Dy,66,161.926806,0.254750
+Dy,66,162.928738,0.248960
+Dy,66,163.929182,0.282600
+Ho,67,164.930329,1.000000
+Er,68,161.928788,0.001390
+Er,68,163.929209,0.016010
+Er,68,165.930299,0.335030
+Er,68,166.932055,0.228690
+Er,68,167.932377,0.269780
+Er,68,169.935470,0.149100
+Tm,69,168.934218,1.000000
+Yb,70,167.933890,0.001230
+Yb,70,169.934766,0.029820
+Yb,70,170.936330,0.140900
+Yb,70,171.936386,0.216800
+Yb,70,172.938215,0.161030
+Yb,70,173.938866,0.320260
+Yb,70,175.942576,0.129960
+Lu,71,174.940775,0.974010
+Lu,71,175.942690,0.025990
+Hf,72,173.940046,0.001600
+Hf,72,175.941408,0.052600
+Hf,72,176.943228,0.186000
+Hf,72,177.943706,0.272800
+Hf,72,178.945823,0.136200
+Hf,72,179.946557,0.350800
+Ta,73,179.947465,0.000120
+Ta,73,180.947996,0.999880
+W,74,179.946711,0.001200
+W,74,181.948204,0.265000
+W,74,182.950223,0.143100
+W,74,183.950931,0.306400
+W,74,185.954363,0.284300
+Re,75,184.952955,0.374000
+Re,75,186.955750,0.626000
+Os,76,183.952488,0.000200
+Os,76,185.953835,0.015900
+Os,76,186.955747,0.019600
+Os,76,187.955835,0.132400
+Os,76,188.958144,0.161500
+Os,76,189.958444,0.262600
+Os,76,191.961477,0.407800
+Ir,77,190.960589,0.373000
+Ir,77,192.962922,0.627000
+Pt,78,189.959930,0.000120
+Pt,78,191.961039,0.007820
+Pt,78,193.962681,0.328600
+Pt,78,194.964792,0.337800
+Pt,78,195.964952,0.252100
+Pt,78,197.967895,0.073560
+Au,79,196.966569,1.000000
+Hg,80,195.965833,0.001500
+Hg,80,197.966769,0.099700
+Hg,80,198.968281,0.168700
+Hg,80,199.968327,0.231000
+Hg,80,200.970303,0.131800
+Hg,80,201.970643,0.298600
+Hg,80,203.973494,0.068700
+Tl,81,202.972345,0.295200
+Tl,81,204.974428,0.704800
+Pb,82,203.973044,0.014000
+Pb,82,205.974466,0.241000
+Pb,82,206.975897,0.221000
+Pb,82,207.976653,0.524000
+Bi,83,208.980399,1.000000
+Po,84,208.982431,0.000000
+Po,84,209.982874,0.000000
+At,85,209.987148,0.000000
+At,85,210.987497,0.000000
+Rn,86,210.990601,0.000000
+Rn,86,220.011394,0.000000
+Rn,86,222.017578,0.000000
+Fr,87,223.019736,0.000000
+Ra,88,223.018502,0.000000
+Ra,88,224.020212,0.000000
+Ra,88,226.025410,0.000000
+Ra,88,228.031071,0.000000
+Ac,89,227.027752,0.000000
+Th,90,230.033134,0.000000
+Th,90,232.038056,1.000000
+Pa,91,231.035884,1.000000
+U,92,233.039636,0.000000
+U,92,234.040952,0.000054
+U,92,235.043930,0.007204
+U,92,236.045568,0.000000
+U,92,238.050788,0.992742
+Np,93,236.046570,0.000000
+Np,93,237.048174,0.000000
+Pu,94,238.049560,0.000000
+Pu,94,239.052164,0.000000
+Pu,94,240.053814,0.000000
+Pu,94,241.056852,0.000000
+Pu,94,242.058743,0.000000
+Pu,94,244.064205,0.000000
+Am,95,241.056829,0.000000
+Am,95,243.061381,0.000000
+Cm,96,243.061389,0.000000
+Cm,96,244.062753,0.000000
+Cm,96,245.065492,0.000000
+Cm,96,246.067224,0.000000
+Cm,96,247.070354,0.000000
+Cm,96,248.072350,0.000000
+Bk,97,247.070307,0.000000
+Bk,97,249.074988,0.000000
+Cf,98,249.074854,0.000000
+Cf,98,250.076406,0.000000
+Cf,98,251.079589,0.000000
+Cf,98,252.081627,0.000000
+Es,99,252.082980,0.000000
+Fm,100,257.095106,0.000000
+Md,101,258.098432,0.000000
+Md,101,260.103650,0.000000
+No,102,259.101030,0.000000
+Lr,103,262.109610,0.000000
+Rf,104,267.121790,0.000000
+Db,105,268.125670,0.000000
+Sg,106,271.133930,0.000000
+Bh,107,272.138260,0.000000
+Hs,108,270.134290,0.000000
+Mt,109,276.151590,0.000000
+Ds,110,281.164510,0.000000
+Rg,111,280.165140,0.000000
+Cn,112,285.177120,0.000000
+Nh,113,284.178730,0.000000
+Fl,114,289.190420,0.000000
+Mc,115,288.192740,0.000000
+Lv,116,293.204490,0.000000
+Ts,117,292.207460,0.000000
+Og,118,294.213920,0.000000
diff --git a/build/lib/cctk/data/vdw_radii.csv b/build/lib/cctk/data/vdw_radii.csv
new file mode 100644
index 0000000..144f248
--- /dev/null
+++ b/build/lib/cctk/data/vdw_radii.csv
@@ -0,0 +1,76 @@
+1, 1.17
+2, 1.4
+3, 1.81
+4, 1.53
+5, 1.92
+6, 1.75
+7, 1.55
+8, 1.40
+9, 1.30
+10, 1.54
+11, 2.27
+12, 1.73
+13, 1.84
+14, 2.10
+15, 1.80
+16, 1.80
+17, 1.77
+18, 1.88
+19, 2.75
+20, 2.31
+21, 2.30
+22, 2.15
+23, 2.05
+24, 2.05
+25, 2.05
+26, 2.05
+27, 2.00
+28, 2.00
+29, 2.00
+30, 2.10
+31, 1.87
+32, 2.11
+33, 1.85
+34, 1.90
+35, 1.95
+36, 2.02
+37, 3.03
+38, 2.49
+39, 2.40
+40, 2.30
+41, 2.15
+42, 2.10
+43, 2.05
+44, 2.05
+45, 2.00
+46, 2.05
+47, 2.10
+48, 2.20
+49, 1.93
+50, 2.17
+51, 2.06
+52, 2.06
+53, 2.10
+54, 2.18
+55, 3.43
+56, 2.68
+57, 2.50
+72, 2.25
+73, 2.20
+74, 2.10
+75, 2.05
+76, 2.00
+77, 2.00
+78, 2.05
+79, 2.10
+80, 2.05
+81, 1.96
+82, 2.02
+83, 2.07
+84, 1.97
+85, 2.02
+86, 2.20
+87, 3.48
+88, 2.83
+89, 2.40
+90, 2.30
diff --git a/build/lib/cctk/ensemble.py b/build/lib/cctk/ensemble.py
new file mode 100644
index 0000000..e35cc08
--- /dev/null
+++ b/build/lib/cctk/ensemble.py
@@ -0,0 +1,663 @@
+import numpy as np
+from copy import deepcopy
+
+import cctk
+from cctk.helper_functions import align_matrices
+
+
+class Ensemble:
+    """
+    Class representing a collection of molecules. They do not all need to have the same atoms or bonds.
+
+    Ensembles are composed of molecules and properties. Molecules are ``Molecule`` objects, whereas properties are ``dict`` objects containing calculation-specific information.
+
+    There are various shortcuts for handling ``Ensemble`` objects:
+
+    - ``ensemble[molecule]`` or ``ensemble[0]`` will return new ``Ensemble`` objects with only the specified molecules.
+        Lists or slices can also be used: so ``ensemble[0:10:2]`` or ``ensemble[[molecule1, molecule2, molecule3]]`` will also return new ``Ensemble`` objects.
+    - Individual properties can be read through tuple indexing: ``ensemble[0,"energy"]`` will return the energy of the first molecule,
+        while ``ensemble[:,"energy"]`` will return a list of all the energies.
+    - To access ``Molecule`` objects, use ``ensemble.molecule``: ``ensemble.molecule[0]`` will return the first object, whereas ``ensemble.molecule[1:3]`` will return a list.
+    - ``ensemble.items()`` will return a list of (molecule, property) pairs.
+    - ``ensemble.molecule_list()`` and ``ensemble.properties_list()`` return lists of molecules and properties, respectively.
+
+    Attributes:
+        name (str): name, for identification
+        _items (dict): keys: ``Molecule`` objects; values: dictionaries containing properties from each molecule, variable. should always be one layer deep.
+        molecules (``MoleculeIndexer``): special object that accesses the keys
+    """
+
+    def __init__(self, name=None):
+        """
+        Create new instance.
+
+        Args:
+            name (str): name of Ensemble
+        """
+        self.name = name
+        self._items = {}
+        self.molecules = self._MoleculeIndexer(self)
+
+    def __str__(self):
+        name = "None" if self.name is None else self.name
+        return f"Ensemble (name={name}, {len(self._items)} molecules)"
+
+    def __getitem__(self, key):
+        if isinstance(key, (int, np.integer)):
+            mol = self.molecule_list()[key]
+            prop = self.properties_list()[key]
+            new = type(self)(name=self.name) # will return either Ensemble or subclass thereof
+            new.add_molecule(mol, properties=prop)
+            return new
+        elif isinstance(key, cctk.Molecule):
+            idx = self.molecule_list().index(key)
+            return self[idx]
+        elif isinstance(key, (list, np.ndarray)):
+            new_list = [self[k] for k in key]
+            return self.join_ensembles(new_list, name=self.name)
+        elif isinstance(key, slice):
+            start, stop, step = key.indices(len(self))
+            return self[list(range(start, stop, step))]
+        elif isinstance(key, tuple):
+            return self.get_property(key[0], key[1])
+        elif key is None:
+            return self
+        else:
+            raise KeyError(f"not a valid datatype for Ensemble key: {type(key)}")
+
+    def __setitem__(self, key, item):
+        assert isinstance(key, tuple), "need two indexes to set a value in an ensemble!"
+        idx = key[0]
+        name = key[1]
+
+        if isinstance(idx, slice):
+            start, stop, step = idx.indices(len(self))
+            self[list(range(start, stop, step)), name] = item
+        elif isinstance(idx, (list, np.ndarray)) and isinstance(item, (list, np.ndarray)):
+            assert len(idx) == len(item), f"can't set {len(item)} items into {len(key)} variables (cf. pigeonhole principle)"
+            for (k, i) in zip(idx, item):
+                self[k, name] = i
+        elif isinstance(idx, (list, np.ndarray)):
+            for k in idx:
+                self[k, name] = item
+        elif isinstance(idx, (int, np.integer)):
+            mol = self.molecule_list()[idx]
+            self[mol, name] = item
+        elif isinstance(idx, cctk.Molecule):
+            if isinstance(name, (list, np.ndarray)):
+                for n in name:
+                    self[idx,n] = item
+            #### we can't assign multiple items to a list of names since that would preclude assigning a list to a single variable
+            else:
+                self._items[idx][name] = item
+        else:
+            raise KeyError(f"not a valid datatype for Ensemble index: {type(idx)}")
+
+    def __len__(self):
+        return len(self._items)
+
+    def __iter__(self):
+        return iter(self.items())
+
+    def keys(self):
+        return self._items.keys()
+
+    def values(self):
+        return self._items.values()
+
+    def molecule_list(self):
+        """
+        Returns a list of the constituent molecules.
+        """
+        return list(self.keys())
+
+    def properties_list(self):
+        """
+        Returns a list of the constituent molecules.
+        """
+        return list(self.values())
+
+    def has_property(self, idx, prop):
+        """
+        Returns ``True`` if property is defined for index ``idx`` and ``False`` otherwise.
+        """
+        combined = self.combined_properties()
+        if prop in combined:
+            return True
+        else:
+            return False
+
+    def combined_properties(self):
+        """
+        Returns a dictionary containing the most up-to-date version of each property.
+        """
+        combined = dict()
+        for p in self.properties_list():
+            combined = {**combined, **p}
+        return combined
+
+    def get_property(self, idx, prop):
+        """
+        """
+        ensemble = self[idx]
+        result = []
+        for m, p in ensemble.items():
+            if isinstance(prop, list):
+                row = []
+                for x in prop:
+                    if x in p:
+                        row.append(p[x])
+                    else:
+                        row.append(None)
+                result.append(row)
+            else:
+                if prop in p:
+                    result.append(p[prop])
+                else:
+                    result.append(None)
+        if len(ensemble) == 1:
+            if result[0] is None:
+                return None
+            return result[0]
+        else:
+            found_something = False
+            for x in result:
+                if x is not None:
+                    found_something = True
+                    break
+            if found_something:
+                return result
+            else:
+                return None
+
+    def get_properties_dict(self, idx):
+        """
+            Returns the dictionary of molecule properties for the specified molecule.
+
+            Args:
+                idx (int or cctk.Molecule): a molecule belonging to this ensemble, either
+                                            0-indexed or given explicitly as a Molecule
+
+            Returns:
+                the property dict corresponding to this Molecule
+        """
+        assert isinstance(idx, (int, np.integer, cctk.Molecule)), "index must be int or Molecule"
+        ensemble = self[idx]
+        assert len(ensemble) == 1, "idx returned too many ensembles"
+        return ensemble.properties_list()[0]
+
+    def items(self):
+        """
+        Returns a list of (molecule, properties) tuple pairs.
+        """
+        return self._items.items()
+
+    # object to allow convenient indexing of the molecules in the ensemble  
+    #
+    # allowed use cases
+    #
+    # retrieving molecules:
+    # ensemble.molecules[0]: first molecule
+    # ensemble.molecules[-1]: last molecule
+    # ensemble.molecules[[0,1]]: first two molecules as a list
+    # ensemble.molecules[0:4:2]: first and third molecules as a list
+    #
+    # setting molecule properties this way is not allowed
+    class _MoleculeIndexer():
+        def __init__(self, ensemble):
+            self.ensemble = ensemble
+
+        def __getitem__(self, key):
+            items_list = list(self.ensemble._items.keys())
+            n_items = len(items_list)
+            if isinstance(key, (int, np.integer)):
+                self._check_key(key, n_items)
+                return items_list[key]
+            if isinstance(key, np.ndarray):
+                assert len(np.shape(key)) == 1, f"multidimensional keys not allowed, shape was {np.shape(key)}"
+            if isinstance(key, (list, np.ndarray)):
+                return_list = []
+                for k in key:
+                    assert isinstance(k, (int, np.integer)), f"key {k} in {str(key)} is not an integer, type is {str(type(k))}"
+                    self._check_key(k, n_items)
+                    return_list.append(items_list[k])
+                return return_list
+            elif isinstance(key, slice):
+                start, stop, step = key.indices(n_items)
+                return [ items_list[i] for i in range(start, stop, step) ]
+            else:
+                raise ValueError(f"cannot index with type {str(type(key))}")
+
+        def __setitem__(self, key, item):
+            raise LookupError("cannot set molecule properties this way; use ensemble.set_property_dict(molecule, property_dict) instead")
+
+        def _check_key(self, key, n_items):
+            assert -n_items <= key < n_items, f"key {key} is out of range...must be between {-n_items} and {n_items-1} inclusive"
+
+        def __iter__(self):
+            return iter(self.ensemble.molecule_list())
+
+    def properties(self, num=None):
+        """
+        Returns a list of the constituent properties.
+        """
+        if num is None:
+            return list(self.values())
+        else:
+            assert isinstance(num, int), "num must be integer"
+            return list(self.values())[num]
+
+    def sort_by(self, property_name, ascending=True):
+        """
+        Sorts the ensemble by the specified property.
+        Throws an error if the property is missing for any entries.
+        Consistent, sort-compatible property values are assumed and not checked.
+
+        Args:
+            property_name (str): the name of the property to sort on (must be a string or number)
+            ascending (bool): whether the property should increase or decrease in value
+        Returns:
+            new Ensemble (current ensemble is not modified)
+        """
+        property_list = self[:,property_name]
+        if property_list is None:
+            raise ValueError(f"property '{property_name}' not found in ensemble")
+        property_list = np.asarray(property_list)
+        n_missing_entries = np.count_nonzero(property_list==None)
+        if n_missing_entries > 0:
+            error = "---sorting error---\n"
+            error += str(property_list)
+            raise ValueError(f"{error}\nproperty '{property_name}' has {n_missing_entries} missing entries and cannot be sorted")
+        new_indices = np.argsort(property_list)
+        if not ascending:
+            new_indices = np.flip(new_indices)
+        return self[[new_indices]]
+
+    def add_molecule(self, molecule, properties=None, copy=False):
+        """
+        Adds a molecule to the ensemble.
+
+        Args:
+            molecule (Molecule): the molecule to be added
+            properties (dict): property name (str) to property value
+            copy (bool): whether to store an independent copy of the molecule
+        """
+        if not isinstance(molecule, cctk.Molecule):
+            raise TypeError("molecule is not a Molecule - so it can't be added!")
+
+        if copy:
+            molecule = deepcopy(molecule)
+
+        if properties is None:
+            #### empty dicts all point to the same memory address by default, so need to prevent that behavior by initializing non-empty dict
+            properties = {"placeholder": 1}
+            del properties["placeholder"]
+
+        assert isinstance(properties, dict), f"properties must be a dict and not type {type(properties)}"
+
+        self._items[molecule] = properties
+
+    def _check_molecule_number(self, number):
+        """
+        Helper method which performs quick checks on the validity of a given molecule number.
+        """
+        try:
+            number = int(number)
+        except:
+            raise TypeError(f"atom number {number} must be integer")
+
+        if number >= len(self._items):
+            raise ValueError(f"atom number {number} too large!")
+
+    @classmethod
+    def join_ensembles(cls, ensembles, name=None):
+        """
+        Creates a new Ensemble object from existing ensembles.
+
+        If every ensemble has energies defined, then the new ensemble will have energies defined too.
+
+        Args:
+            name (str): name of Ensemble created
+            ensembles (list of Ensembles): Ensemble objects to join
+        """
+        new_ensemble = Ensemble(name=name)
+        for ensemble in ensembles:
+            assert isinstance(ensemble, Ensemble), "can't join an object that isn't an Ensemble!"
+
+        for ensemble in ensembles:
+            new_ensemble._items.update(ensemble.items())
+
+        return new_ensemble
+
+    def lowest_molecules(self, property_name, num=1):
+        """
+        Retrieves the molecules with the lowest values of the specified property.
+
+        Args:
+            property_name (str): the name of the property to sort on
+            num (int): how many molecules to return
+        Returns:
+            lowest ``Molecule`` (if num==1)
+            ``list`` of ``Molecule`` (otherwise)
+        """
+        assert isinstance(num, (int, np.integer)), f"num must be an integer, got {type(num)}"
+        assert num > 0, f"num must be > 0, got {num}"
+        sorted_ensemble = self.sort_by(property_name)
+        if num > 1:
+            return sorted_ensemble.molecules[0:num]
+        return sorted_ensemble.molecules[0]
+
+class ConformationalEnsemble(Ensemble):
+    """
+    Class that representing a group of conformers. All members must have the same atom types in the same order.
+    """
+
+    def __str__(self):
+        n_atoms = 0
+        if len(self._items) > 0:
+            first_molecule = self.molecule_list()[0]
+            n_atoms = first_molecule.num_atoms()
+        if self.name is not None:
+            return f"ConformationalEnsemble (name={self.name}, {len(self._items)} molecules, {n_atoms} atoms)"
+        else:
+            return f"ConformationalEnsemble ({len(self._items)} molecules, {n_atoms} atoms)"
+
+    def add_molecule(self, molecule, properties=None, copy=False, checks=True):
+        """
+        Checks that the molecule contains the same atom types in the same order as existing molecules, and that the molecule has the same charge/multiplicity.
+        """
+        if len(self._items) > 0:
+            initial_mol = self.molecule_list()[0]
+            if molecule.num_atoms() != initial_mol.num_atoms():
+                raise ValueError("wrong number of atoms for this ensemble")
+
+            if molecule.charge != initial_mol.charge:
+                raise ValueError("wrong charge for this ensemble")
+
+            if molecule.multiplicity != initial_mol.multiplicity:
+                raise ValueError("wrong spin multiplicity for this ensemble")
+
+            if checks and not np.array_equal(molecule.atomic_numbers, initial_mol.atomic_numbers):
+                raise ValueError("wrong atom types for this ensemble")
+
+            #### only save one copy to save space
+            molecule.bonds = initial_mol.bonds
+            molecule.atomic_numbers = initial_mol.atomic_numbers
+
+        super().add_molecule(molecule, properties, copy)
+
+    @classmethod
+    def join_ensembles(cls, ensembles, name=None, copy=False):
+        """
+        Creates a new ConformationalEnsemble object from existing ensembles.
+        Both molecules and properties are copied.
+
+        Args:
+            name (str): name of ConformationalEnsemble created
+            ensembles (list of ConformationalEnsembles): ConformationalEnsemble objects to join
+            copy (bool): whether to make copies of the component molecules
+        """
+        new_ensemble = ConformationalEnsemble(name=name)
+        for ensemble in ensembles:
+            assert isinstance(ensemble, ConformationalEnsemble), "can't join an object that isn't a ConformationalEnsemble!"
+
+        for ensemble in ensembles:
+            for mol, prop in ensemble.items():
+                new_ensemble.add_molecule(mol, prop, copy)
+
+        return new_ensemble
+
+    def align(self, to_geometry=0, comparison_atoms="heavy", compute_RMSD=False):
+        """
+        Aligns every geometry in this ensemble to the specified geometry,
+        optionally computing the root-mean-square distance between each
+        geometry and the reference geometry.
+
+        Alignments are based on `atom_numbers`.
+        The current ensemble will not be altered.  RMSDs will be calculated over the
+        comparison atoms only.
+
+        Args:
+            to_geometry (int): the reference geometry to align to (0-indexed)
+            comparison_atoms (str or list): which atoms to use when computing alignments
+                                            "heavy" for all non-hydrogen atoms,
+                                            "all" for all atoms, or
+                                            a list of 1-indexed atom numbers
+            compute_RMSD (Bool): whether to return RMSD before and after rotation
+
+        Returns:
+            new aligned ``ConformationalEnsemble`` or
+            new aligned ``ConformationalEnsemble``, before_RMSD array, after_RMSD array
+        """
+        # check inputs
+        self._check_molecule_number(to_geometry)
+        n_atoms = self.molecules[0].num_atoms()
+
+        if isinstance(comparison_atoms, str):
+            if comparison_atoms == "all":
+                comparison_atoms = np.arange(1, n_atoms + 1)
+            elif comparison_atoms == "heavy":
+                comparison_atoms = self.molecules[0].get_heavy_atoms()
+        assert isinstance(comparison_atoms, (list, np.ndarray, cctk.OneIndexedArray)), f"unexpected type for comparison_atoms: {str(type(comparison_atoms))}"
+        for a in comparison_atoms:
+            assert 1 <= a <= n_atoms, f"atom number out of range: got {a}, but must be between 1 and {n_atoms}"
+
+        assert len(comparison_atoms) >= 3, f"need at least 3 atoms for alignment, but only got {len(comparison_atoms)}"
+
+        # duplicate the ensemble
+        new_ensemble = deepcopy(self)
+
+        # translate all molecules to the origin
+        # with respect to the comparison atoms
+        for molecule, _ in new_ensemble:
+            full_geometry = molecule.geometry
+            partial_geometry = full_geometry[comparison_atoms]
+            translation_vector = -partial_geometry.mean(axis=0)
+            molecule.translate_molecule(translation_vector)
+
+        full_template_geometry = new_ensemble.molecules[to_geometry].geometry
+        partial_template_geometry = full_template_geometry[comparison_atoms]
+        before_RMSDs = []
+        after_RMSDs = []
+
+        # perform alignment using Kabsch algorithm
+        for i, (molecule, _) in enumerate(new_ensemble):
+            full_geometry = molecule.geometry
+            partial_geometry = full_geometry[comparison_atoms]
+            if compute_RMSD:
+                before_RMSD = cctk.helper_functions.compute_RMSD(partial_template_geometry, partial_geometry)
+                before_RMSDs.append(before_RMSD)
+            new_geometry = align_matrices(partial_geometry, full_geometry, partial_template_geometry)
+            molecule.geometry = new_geometry
+            if compute_RMSD:
+                partial_geometry = new_geometry[comparison_atoms]
+                after_RMSD = cctk.helper_functions.compute_RMSD(partial_template_geometry, partial_geometry)
+                after_RMSDs.append(after_RMSD)
+            assert len(molecule.geometry) == n_atoms, f"wrong number of geometry elements! expected {n_atoms}, got {len(molecule.geometry)}"
+
+        if compute_RMSD:
+            return new_ensemble, before_RMSDs, after_RMSDs
+        return new_ensemble
+
+    def eliminate_redundant(self, RMSD_cutoff=0.5, comparison_atoms="heavy", return_RMSD=False):
+        """
+        Aligns every geometry in this ensemble and then creates a new ensemble that contains only the non-redundant conformers.
+        If energies are available, the lowest energy conformer will be kept for every redundancy.
+        The current ensemble will not be modified.  The resulting ensemble will be sorted by energy (if available).
+
+        Args:
+            RMSD_cutoff (float): remove conformers that are more similar than this threshold
+            to_geometry (int): the reference geometry to align to (0-indexed)
+            comparison_atoms (str or list): which atoms to use when computing alignments
+                                            "heavy" for all non-hydrogen atoms,
+                                            "all" for all atoms, or
+                                            a list of 1-indexed atom numbers
+            return_RMSD (bool): whether or not to return list of RMSD values
+
+        Returns:
+            new ``ConformationalEnsemble``, RMSDs to the reference geometry
+        """
+        # check inputs
+        n_atoms = self.molecules[0].num_atoms()
+        if isinstance(comparison_atoms, str):
+            if comparison_atoms == "all":
+                comparison_atoms = np.arange(1, n_atoms + 1)
+            elif comparison_atoms == "heavy":
+                comparison_atoms = self.molecules[0].get_heavy_atoms()
+
+        assert isinstance(comparison_atoms, (list, np.ndarray, cctk.OneIndexedArray)), f"unexpected type for comparison_atoms: {str(type(comparison_atoms))}"
+        for a in comparison_atoms:
+            assert 1 <= a <= n_atoms, f"atom number out of range: got {a}, but must be between 1 and {n_atoms}"
+        assert len(comparison_atoms) >= 3, f"need at least 3 atoms for alignment, but only got {len(comparison_atoms)}"
+
+        assert isinstance(RMSD_cutoff, (float, int)), f"RMSD cutoff must be a float but got {str(type(RMSD_cutoff))}"
+        assert RMSD_cutoff > 0.0001, "must use a big enough RMSD cutoff"
+
+        # align all molecules
+        old_ensemble = self.align(to_geometry=0, comparison_atoms=comparison_atoms, compute_RMSD=False)
+
+        # sort molecules by energy if available
+        energies_available = True
+        for molecule,properties in old_ensemble.items():
+            if "energy" not in properties:
+                energies_available = False
+                break
+
+        n_molecules = len(old_ensemble)
+        sorted_indices = list(range(n_molecules))
+        if energies_available:
+            energies = old_ensemble[:,"energy"]
+            sorted_indices = list(np.argsort(energies))
+
+        # boolean indexing noticeably faster
+        idxs = np.array(comparison_atoms)
+        mask = np.zeros(old_ensemble.molecules[0].geometry.shape[0], dtype=bool)
+        mask[idxs - 1] = True
+
+        partial_geoms = [m.geometry[mask] for m in old_ensemble.molecules]
+        new_partial_geoms = []
+
+        rmsds = list()
+
+        # add molecules one by one
+        new_ensemble = ConformationalEnsemble()
+        for i in sorted_indices:
+            ok_to_add = True
+
+            candidate_rmsd = 0
+            for existing_molecule in new_partial_geoms:
+                candidate_rmsd = cctk.helper_functions.compute_RMSD(partial_geoms[i], existing_molecule, checks=False)
+                if candidate_rmsd < RMSD_cutoff:
+                    ok_to_add = False
+                    break
+
+            if ok_to_add:
+                candidate_molecule = old_ensemble.molecules[i]
+                candidate_molecule_properties = old_ensemble.get_properties_dict(candidate_molecule)
+
+                new_ensemble.add_molecule(candidate_molecule, candidate_molecule_properties)
+                new_partial_geoms.append(candidate_molecule.geometry[mask])
+                rmsds.append(candidate_rmsd)
+
+        if return_RMSD:
+            return new_ensemble, rmsds
+        else:
+            return new_ensemble
+
+    def get_geometric_parameters(self, parameter, atom1, atom2, atom3=None, atom4=None):
+        """
+        Computes and outputs geometric parameters (bond distances, angles, or dihedral angles) for every member of ``self.molecules.``
+
+        Args:
+            parameter (str): one of ``angle``, ``distance``, or ``dihedral``
+            atom1 (int): number of the atom in question
+            atom2 (int): same, but for the second atom
+            atom3 (int): same, but for the third atom (only required for parameter ``angle`` or ``dihedral``)
+            atom4 (int): same, but for the fourth atom (only required for parameter ``dihedral``)
+
+        Returns:
+            a list of the specified parameter's values for each geometry
+        """
+        output = [None] * len(self)
+        for index, molecule in enumerate(self.molecule_list()):
+            if parameter == "distance":
+                output[index] = molecule.get_distance(atom1, atom2)
+            elif parameter == "angle":
+                if atom3 is None:
+                    raise ValueError("need atom3 to calculate angle!")
+                output[index] = molecule.get_angle(atom1, atom2, atom3)
+            elif parameter == "dihedral":
+                if (atom3 is None) or (atom4 is None):
+                    raise ValueError("need atom3 and atom4 to calculate dihedral!")
+                output[index] = molecule.get_dihedral(atom1, atom2, atom3, atom4)
+            else:
+                raise ValueError(f"Invalid parameter {parameter}!")
+
+        return output
+
+    def assign_connectivity(self, index=0):
+        """
+        Assigns connectivity for all molecules based on molecule of index ``index``. Much faster than assigning connectivity for each individually -- but assumes all bonding is the same.
+        """
+        assert isinstance(index, int), "Need integer index"
+        bonds = self.molecules[index].assign_connectivity().bonds
+
+        for mol in self.molecules:
+            mol.bonds = bonds
+
+        return self
+
+    def boltzmann_average(self, which, energies=None, temp=298, energy_unit="hartree", return_weights=False):
+        """
+        Computes the Boltzmann-weighted average of a property over the whole ensemble.
+
+        Args:
+            which (str): which property to compute
+            energy (np.ndarray): list of energies to use for weighting.
+                Will default to ``self[:,"energy"]``, although other strings can be passed as well as shorthand for ``self[:,energy]``.
+            temp (float): temperature for Boltzmann-weighting, in K
+            energy_unit (str): either ``kcal_mol`` or ``hartree``
+            return_weights (bool): whether to return a list of weights too
+
+        Returns:
+            weighted property, of the same shape as the individual property
+        """
+        if energies is None:
+            energies = self[:,"energy"]
+        elif isinstance(energies, str):
+            energies = self[:,energies]
+        elif isinstance(energies, (list, np.ndarray, cctk.OneIndexedArray)):
+            pass
+        else:
+            raise ValueError(f"invalid energy value {energies} (type {type(energies)})")
+
+        for i, (m, pd) in enumerate(self.items()):
+            assert which in pd, f"molecule #{i} doesn't have property {which} defined!"
+
+        values = np.array(self[:,which], dtype=np.float64)
+        energies = np.array(energies, dtype=np.float64)
+
+        assert len(energies) == len(self)
+        assert len(values) == len(self)
+        assert all([e is not None for e in energies]), "energy not defined for all molecules"
+        assert all([v is not None for v in values]), f"property {which} not defined for all molecules"
+
+        # perhaps at some point we will need a real unit system like simtk/OpenMM, but not today!
+        if energy_unit == "kcal_mol":
+            energies = energies / 627.509
+        energies = energies - np.min(energies)
+
+        R = 3.1668105e-6 # eH/K
+
+        weights = np.exp(-1*energies/(R*temp))
+        weights = weights / np.sum(weights)
+
+        try:
+            weighted_value = np.average(values, weights=weights)
+        except Exception as e:
+            raise ValueError(f"error computing Boltzmann average: {e}")
+
+        if return_weights:
+            return weighted_value, weights
+        else:
+            return weighted_value
diff --git a/build/lib/cctk/file.py b/build/lib/cctk/file.py
new file mode 100644
index 0000000..ac12e55
--- /dev/null
+++ b/build/lib/cctk/file.py
@@ -0,0 +1,81 @@
+import os
+from abc import ABC, abstractmethod
+
+
+class File(ABC):
+    """
+    Abstract class representing text files.
+    """
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def write_file(filename, text, overwrite_existing=True):
+        """
+        Writes output text to a file.
+
+        Args:
+            filename (str): path to file, including name (e.g. ``path/to/input.gjf``)
+            text (str): desired contents of file
+            overwrite_existing (Bool): whether any existing files should be overwritten or not
+
+        Returns:
+            ``True`` if write succeeded, ``False`` otherwise
+        """
+        if not isinstance(text, str):
+            raise TypeError("cannot write non-string to file!")
+
+        if not overwrite_existing and os.path.exists(filename):
+            raise ValueError(f"{filename} already exists but not allowed to overwrite")
+        else:
+            try:
+                with open(filename, "w+") as output_file:
+                    output_file.write(text)
+                return True
+            except OSError as e:
+                print(e)
+                return False
+
+    @staticmethod
+    def append_to_file(filename, text):
+        """
+        Appends output text to a file.
+
+        Args:
+            filename (str): path to file, including name (e.g. ``path/to/input.gjf``)
+            text (str): desired contents of file
+
+        Returns:
+            ``True`` if write succeeded, ``False`` otherwise
+        """
+        if not isinstance(text, str):
+            raise TypeError("cannot write non-string to file!")
+
+        if os.path.exists(filename):
+            try:
+                with open(filename, "a+") as output_file:
+                    output_file.write(text)
+                return True
+            except OSError as e:
+                print(e)
+                return False
+        else:
+            raise ValueError(f"{filename} does not exist")
+
+    @staticmethod
+    def read_file(filename, lazy=False):
+        """
+        Reads a file and parses into lines.
+
+        Args:
+            filename (str): The path to the file.
+
+        Returns:
+            A list containing all the lines in the file.
+        """
+        with open(filename, "r") as filehandle:
+            lines = filehandle.read().splitlines()
+            return lines
+
diff --git a/build/lib/cctk/gaussian_file.py b/build/lib/cctk/gaussian_file.py
new file mode 100644
index 0000000..68fb938
--- /dev/null
+++ b/build/lib/cctk/gaussian_file.py
@@ -0,0 +1,757 @@
+import re, warnings
+import numpy as np
+
+from enum import Enum
+
+from cctk import File, Molecule, ConformationalEnsemble, OneIndexedArray
+from cctk.helper_functions import get_symbol, get_number, get_corrected_free_energy
+import cctk
+
+import cctk.parse_gaussian as parse
+
+
+class GaussianJobType(Enum):
+    """
+    Class representing allowed Gaussian job types. Not an exhaustive list, but should be fairly comprehensive.
+
+    The value should be the Gaussian keyword, to permit automatic assignment.
+
+    All jobs have type ``SP`` by default.
+    """
+
+    SP = "sp"
+    """
+    Single point energy calculation.
+    """
+
+    OPT = "opt"
+    """
+    Geometry optimization.
+    """
+
+    FREQ = "freq"
+    """
+    Hessian calculation.
+    """
+
+    IRC = "irc"
+    """
+    Intrinsic reaction coordinate calculation.
+    """
+
+    NMR = "nmr"
+    """
+    NMR shielding prediction.
+    """
+
+    POP = "pop"
+    """
+    Population analysis.
+    """
+
+    FORCE = "force"
+    """
+    Gradient calculation.
+    """
+
+#### This static variable tells what properties are expected from each JobType.
+EXPECTED_PROPERTIES = {
+    "sp": ["energy", "scf_iterations",],
+    "opt": ["rms_displacement", "rms_force",],
+    "freq": ["gibbs_free_energy", "enthalpy", "frequencies",],
+    "nmr": ["isotropic_shielding",],
+    "pop": [],
+    "force": ["forces",],
+}
+
+
+class GaussianFile(File):
+    """
+    Class representing Gaussian input/output files.
+
+    Attributes:
+        ensemble (ConformationalEnsemble): ``ConformationalEnsemble`` instance
+        job_types (list): list of `job_type`` instances
+        route_card (str): optional, route card of .gjf file
+        link0 (dict): optional, dictionary of Link 0 commands (e.g. {"mem": "32GB", "nprocshared": 16})
+        footer (str): optional, footer of .gjf file
+        successful_terminations (int): number of successful terminations (should be 1 for an opt, 2 for opt and then freq, 1 for a single point energy, etc)
+        elapsed_time (float): total time for job in seconds
+        title (str): optional, title of .gjf file
+    """
+
+    def __init__(
+        self, job_types=None, route_card=None, link0=None, footer=None, title="title", success=0, elapsed_time=0.0, molecule=None,
+    ):
+        """
+        Create new GaussianFile object.
+
+        Args:
+            job_types (list): list of ``job_type`` instances
+            route_card (str): optional, route card of ``.gjf`` file
+            link0 (dict): optional, Link 0 commands of ``.gjf`` file
+            footer (str): optional, footer of ``.gjf`` file
+            title (str): optional, title of ``.gjf`` file
+            success (int): num successful terminations
+            elapsed_time (float): total time for job in seconds
+            molecule (cctk.Molecule): molecule to initiate, if desired
+        """
+
+        if route_card and not isinstance(route_card, str):
+            raise TypeError("route card needs to be a string")
+
+        if link0 and not isinstance(link0, dict):
+            raise TypeError("link0 needs to be a dict")
+
+        if footer and not isinstance(footer, str):
+            raise TypeError("footer needs to be a string")
+
+        if title and not isinstance(title, str):
+            raise TypeError("title needs to be a string")
+
+        if success and not isinstance(success, int):
+            raise TypeError("success needs to be an integer")
+
+        if not isinstance(elapsed_time, (float, int)) or elapsed_time < 0.0:
+            raise TypeError(f"elapsed_time invalid: {elapsed_time}")
+
+        if job_types is not None:
+            if isinstance(job_types, str):
+                raise ValueError(f"invalid job_types {job_types} - did you mean to call GaussianFile.read_file({job_types})?")
+            if not all(isinstance(job, GaussianJobType) for job in job_types):
+                raise TypeError(f"invalid job_types {job_types}")
+
+        self.ensemble = ConformationalEnsemble()
+
+        if molecule is not None:
+            assert isinstance(molecule, Molecule), "molecule is not a valid cctk.Molecule!"
+            self.ensemble.add_molecule(molecule)
+
+        self.route_card = route_card
+        self.link0 = link0
+        self.footer = footer
+        self.title = title
+        self.job_types = job_types
+        self.successful_terminations = success
+        self.elapsed_time = elapsed_time
+
+    def __str__(self):
+        return f"GaussianFile (title=\"{str(self.title)}\", {len(self.ensemble)} entries in Ensemble)"
+
+    @classmethod
+    def write_molecule_to_file(cls, filename, molecule, route_card, link0={"mem": "32GB", "nprocshared": 16}, footer=None, title="title", append=False, print_symbol=False, point_charges=None):
+        """
+        Write a ``.gjf`` file using the given molecule.
+
+        Args:
+            filename (str): path to the new file
+            molecule (Molecule): which molecule to use -- a ``Molecule`` object.
+            route_card (str): route card for new file
+            link0 (dict): dictionary of Link 0 commands
+            footer (str): footer for new file
+            title (str): title of the file, defaults to "title"
+            append (Bool): whether or not to append to file using Link1 specifications
+            print_symbol (Bool): whether to print atomic symbols (instead of atomic numbers)
+        """
+        if not isinstance(molecule, Molecule):
+            raise TypeError("need a valid molecule to write a file!")
+
+        if (route_card is None) or (not isinstance(route_card, str)):
+            raise ValueError("can't write a file without a route card")
+
+        if not re.match(r"^#p", route_card):
+            warnings.warn(f"route card doesn't start with #p: {route_card}")
+
+        if point_charges is not None:
+            assert isinstance(point_charges, list), "point_charges must be list"
+            assert all([isinstance(pc, cctk.PointCharge) for pc in point_charges]), "point_charges must be list of point charges"
+            assert re.search(r"charge", route_card, flags=re.IGNORECASE), "charge must be in route_card if point_charges are present"
+
+        #### generate the text
+        text = ""
+        if append:
+            text += "--Link1--\n"
+
+        if isinstance(link0, dict):
+            for key, val in link0.items():
+                text += f"%{key}={val}\n"
+
+        text += f"{route_card.strip()}\n\n{title}\n\n"
+
+        text += f"{int(molecule.charge)} {int(molecule.multiplicity)}\n"
+        for index, Z in enumerate(molecule.atomic_numbers, start=1):
+            line = molecule.get_vector(index)
+            if print_symbol:
+                Z = get_symbol(Z)
+                text += f"{Z:>2}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
+            else:
+                text += f"{Z:2d}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
+
+        text += "\n"
+        if footer is not None:
+            text += f"{footer.strip()}\n\n"
+
+        if point_charges is not None:
+            for point_charge in point_charges:
+                text += f"{point_charge.coordinates[0]:>13.8f} {point_charge.coordinates[1]:>13.8f} {point_charge.coordinates[2]:>13.8f} {point_charge.charge:.5f}\n"
+            text += "\n"
+
+        #### write the file
+        if append:
+            super().append_to_file(filename, text)
+        else:
+            super().write_file(filename, text)
+
+    def write_file(self, filename, molecule=None, route_card=None, link0=None, footer=None, **kwargs):
+        """
+        Write a ``.gjf`` file, using object attributes. If no header/footer is specified, the object's header/footer will be used.
+
+        Args:
+            filename (str): path to the new file
+            molecule (int): which molecule to use -- passed to ``self.get_molecule()``.
+                Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble(1-indexed).
+                A ``Molecule`` object can also be passed, in which case that molecule will be written to the file.
+            route_card (str): route card for new file
+            link0 (dict): dictionary of Link 0 commands (e.g. {"mem": "32GB", "nprocshared": 16}
+            footer (str): footer for new file
+        """
+        if not isinstance(molecule, Molecule):
+            molecule = self.get_molecule(molecule)
+
+        if route_card is None:
+            route_card = self.route_card
+
+        if link0 is None:
+            link0 = self.link0
+
+        if footer is None:
+            footer = self.footer
+
+        self.write_molecule_to_file(filename, molecule, route_card, link0, footer, **kwargs)
+
+    def num_imaginaries(self):
+        """
+        Returns the number of imaginary frequencies.
+        """
+        return len(self.imaginaries())
+
+    def imaginaries(self):
+        """
+        Returns the imaginary frequencies, rounded to the nearest integer.
+        """
+        if (GaussianJobType.FREQ in self.job_types) and (self.ensemble[-1:,"frequencies"] is not None):
+            freqs = self.ensemble[-1:,"frequencies"]
+            if not isinstance(freqs, list) or len(freqs) == 0:
+                return list()
+            else:
+                return list(map(int, np.array(freqs)[np.array(freqs) < 0]))
+        else:
+            return list()
+
+    @classmethod
+#    @profile
+    def read_file(cls, filename, return_lines=False, extended_opt_info=False):
+        """
+        Reads a Gaussian``.out`` or ``.gjf`` file and populates the attributes accordingly.
+        Only footers from ``opt=modredundant`` can be read automatically --  ``genecep`` custom basis sets, &c must be specified manually.
+
+        Note:
+
+        Will throw ``ValueError`` if there have been no successful iterations.
+
+        Args:
+            filename (str): path to the out file
+            return_lines (Bool): whether the lines of the file should be returned
+            extended_opt_info (Bool): if full parameters about each opt step should be collected
+                (by default, only ``rms_displacement`` and ``rms_force`` are collected)
+        Returns:
+            ``GaussianFile`` object (or list of ``GaussianFile`` objects for Link1 files)
+            (optional) the lines of the file (or list of lines of file for Link1 files)
+        """
+        if re.search("gjf$", filename) or re.search("com$", filename):
+            return cls._read_gjf_file(filename, return_lines)
+
+        link1_lines = parse.split_link1(filename)
+        files = []
+
+        for link1idx, lines in enumerate(link1_lines):
+            #### automatically assign job types based on header
+            header = lines.search_for_block("#p", "----", format_line=lambda x: x.lstrip(), join="")
+            if header is None:
+                raise ValueError("can't find route card! (perhaps '#p' wasn't employed?)")
+            job_types = cls._assign_job_types(header)
+
+            link0 = parse.extract_link0(lines)
+
+            title = ""
+            title_block = lines.search_for_block("l101.exe", "Symbolic Z-matrix", join="\n")
+            if title_block is not None:
+                for line in title_block.split("\n")[1:]:
+                    if not re.search("-----", line):
+                        title += line
+
+
+            (geometries, atom_list, energies, scf_iterations, success, elapsed_time) = parse.read_geometries_and_energies(lines)
+            success, elapsed_time = parse.extract_success_and_time(lines)
+            atomic_numbers = []
+
+            #### convert to right datatype
+            try:
+                atomic_numbers = np.array(atom_list, dtype=np.int8)
+            except Exception as e:
+                atomic_numbers = np.array(list(map(get_number, atom_list)), dtype=np.int8)
+
+            footer = None
+            if re.search("modredundant", str(header)):
+                footer = lines.search_for_block("^ The following ModRedundant input section", "^ $", count=1, join="\n")
+                if footer is not None:
+                    footer = "\n".join(list(footer.split("\n"))[1:])  # get rid of the first line
+                    footer = "\n".join([" ".join(list(filter(None, line.split(" ")))) for line in footer.split("\n")])
+
+            bonds = parse.read_bonds(lines)
+            charge, multip =  lines.find_parameter("Multiplicity", expected_length=4, which_field=[1,3], split_on="=")[0]
+
+            f = GaussianFile(job_types=job_types, route_card=header, link0=link0, footer=footer, success=success, elapsed_time=elapsed_time, title=title)
+
+            molecules = [None] * len(geometries)
+            properties = [{} for _ in range(len(geometries))]
+            for idx, geom in enumerate(geometries):
+                molecules[idx] = Molecule(atomic_numbers, geom, charge=charge, multiplicity=multip, bonds=bonds)
+                if idx < len(energies):
+                    properties[idx]["energy"] = energies[idx]
+                if idx < len(scf_iterations):
+                    properties[idx]["scf_iterations"] = scf_iterations[idx]
+                properties[idx]["link1_idx"] = link1idx
+                properties[idx]["filename"] = filename
+                properties[idx]["iteration"] = idx
+
+            #### now for some job-type specific attributes
+            if GaussianJobType.OPT in job_types:
+                rms_forces = lines.find_parameter("RMS\s+Force", expected_length=5, which_field=2)
+                rms_displacements = lines.find_parameter("RMS\s+Displacement", expected_length=5, which_field=2)
+
+                if extended_opt_info:
+                    max_forces = lines.find_parameter("Maximum Force", expected_length=5, which_field=2)
+                    max_displacements = lines.find_parameter("Maximum Displacement", expected_length=5, which_field=2)
+                    max_gradients = lines.find_parameter("Cartesian Forces:", expected_length=6, which_field=3)
+                    rms_gradients = lines.find_parameter("Cartesian Forces:", expected_length=6, which_field=5)
+                    max_int_forces = lines.find_parameter("Internal  Forces:", expected_length=6, which_field=3)
+                    rms_int_forces = lines.find_parameter("Internal  Forces:", expected_length=6, which_field=5)
+                    delta_energy = lines.find_parameter("Predicted change in Energy", expected_length=4, which_field=3, cast_to_float=False)
+
+                for idx, force in enumerate(rms_forces):
+                    properties[idx]["rms_force"] = force
+                    properties[idx]["rms_displacement"] = rms_displacements[idx]
+
+                    if extended_opt_info:
+                        if idx < len(max_forces):
+                            properties[idx]["max_force"] = max_forces[idx]
+
+                        if idx < len(max_displacements):
+                            properties[idx]["max_displacement"] = max_displacements[idx]
+
+                        if idx < len(max_gradients):
+                            properties[idx]["max_gradient"] = max_gradients[idx]
+
+                        if idx < len(rms_gradients):
+                            properties[idx]["rms_gradient"] = rms_gradients[idx]
+
+                        if idx < len(max_int_forces):
+                            properties[idx]["max_internal_force"] = max_int_forces[idx]
+
+                        if idx < len(rms_int_forces):
+                            properties[idx]["rms_internal_force"] = rms_int_forces[idx]
+
+                        if idx < len(delta_energy):
+                            change_in_energy = re.sub(r"Energy=", "", delta_energy[idx])
+                            properties[idx]["predicted_change_in_energy"] = float(change_in_energy.replace('D', 'E'))
+
+            if GaussianJobType.FREQ in job_types:
+                enthalpies = lines.find_parameter("thermal Enthalpies", expected_length=7, which_field=6)
+                if len(enthalpies) == 1:
+                    properties[-1]["enthalpy"] = enthalpies[0]
+                elif len(enthalpies) > 1:
+                    raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}")
+
+                gibbs_vals = lines.find_parameter("thermal Free Energies", expected_length=8, which_field=7)
+                if len(gibbs_vals) == 1:
+                    properties[-1]["gibbs_free_energy"] = gibbs_vals[0]
+                elif len(gibbs_vals) > 1:
+                    raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}")
+
+            if GaussianJobType.FREQ in job_types:
+                enthalpies = lines.find_parameter("thermal Enthalpies", expected_length=7, which_field=6)
+                if len(enthalpies) == 1:
+                    properties[-1]["enthalpy"] = enthalpies[0]
+                elif len(enthalpies) > 1:
+                    raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}")
+
+                gibbs_vals = lines.find_parameter("thermal Free Energies", expected_length=8, which_field=7)
+                if len(gibbs_vals) == 1:
+                    properties[-1]["gibbs_free_energy"] = gibbs_vals[0]
+                elif len(gibbs_vals) > 1:
+                    raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}")
+
+                frequencies = []
+                try:
+                    frequencies = sum(lines.find_parameter("Frequencies", expected_length=5, which_field=[2,3,4]), [])
+                    properties[-1]["frequencies"] = sorted(frequencies)
+                except Exception as e:
+                    raise ValueError("error finding frequencies")
+
+                #  Temperature   298.150 Kelvin.  Pressure   1.00000 Atm.
+                temperature = lines.find_parameter("Temperature", expected_length=6, which_field=1)
+                if len(temperature) == 1:
+                    properties[-1]["temperature"] = temperature[0]
+                    try:
+                        corrected_free_energy = get_corrected_free_energy(gibbs_vals[0], frequencies, frequency_cutoff=100.0, temperature=temperature[0])
+                        properties[-1]["quasiharmonic_gibbs_free_energy"] = float(f"{float(corrected_free_energy):.6f}") # yes this is dumb
+                    except Exception as e:
+                        pass
+
+
+            if GaussianJobType.NMR in job_types:
+                nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms())
+                if nmr_shifts is not None:
+                    properties[-1]["isotropic_shielding"] = nmr_shifts.view(OneIndexedArray)
+
+                if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search("nmr=spinspin", f.route_card,flags=re.IGNORECASE):
+                    couplings = parse.read_j_couplings(lines, molecules[0].num_atoms())
+                    if couplings is not None:
+                        properties[-1]["j_couplings"] = couplings
+
+            if GaussianJobType.FORCE in job_types:
+                assert len(molecules) == 1, "force jobs should not be combined with optimizations!"
+                forces = parse.read_forces(lines)
+                properties[0]["forces"] = forces
+
+            if GaussianJobType.POP in job_types:
+                if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card):
+                    charges, spins = parse.read_hirshfeld_charges(lines)
+                    properties[-1]["hirshfeld_charges"] = charges
+                    properties[-1]["hirshfeld_spins"] = spins
+
+            try:
+                charges = parse.read_mulliken_charges(lines)
+                properties[-1]["mulliken_charges"] = charges
+            except Exception as e:
+                pass
+
+            try:
+                dipole = parse.read_dipole_moment(lines)
+                properties[-1]["dipole_moment"] = dipole
+            except Exception as e:
+                pass
+
+            for mol, prop in zip(molecules, properties):
+                f.ensemble.add_molecule(mol, properties=prop)
+
+            f.check_has_properties()
+            files.append(f)
+
+        if return_lines:
+            if len(link1_lines) == 1:
+                return files[0], link1_lines[0]
+            else:
+                return files, link1_lines
+        else:
+            if len(link1_lines) == 1:
+                return files[0]
+            else:
+                return files
+
+    @classmethod
+    def _read_gjf_file(cls, filename, return_lines=False):
+        """
+        Reads a Gaussian ``.gjf`` or ``.com`` file and populates the attributes accordingly.
+
+        Args:
+            filename (str): path to the out file
+            return_lines (Bool): whether the lines of the file should be returned
+        Returns:
+            GaussianFile object
+            (optional) the lines of the file
+        """
+        lines = super().read_file(filename)
+        header = None
+        link0 = {}
+        footer = None
+        header_done = False
+        title = None
+        charge = None
+        multip = None
+        in_geom = False
+        atomic_numbers = []
+        geometry = []
+
+        for idx, line in enumerate(lines):
+            if header is None:
+                if re.match("\%", line):
+                    pieces = line[1:].split("=")
+                    link0[pieces[0]] = pieces[1]
+                    continue
+                if re.match("#", line):
+                    header = line
+                    continue
+
+            if (title is None) and (header is not None):
+                if header_done:
+                    if len(line.strip()) > 0:
+                        title = line
+                else:
+                    if len(line.strip()) > 0:
+                        header = header + line
+                    else:
+                        header_done = True
+                continue
+
+            if (title is not None) and (charge is None):
+                if len(line.strip()) > 0:
+                    pieces = list(filter(None, line.split(" ")))
+                    assert len(pieces) == 2, f"can't parse line {line}"
+
+                    charge = int(pieces[0])
+                    multip = int(pieces[1])
+                    in_geom = True
+                    continue
+
+            if in_geom == True:
+                if len(line.strip()) == 0:
+                    in_geom = False
+                else:
+                    pieces = list(filter(None, line.split(" ")))
+                    assert len(pieces) == 4, f"can't parse line {line}"
+
+                    atomic_numbers.append(pieces[0])
+                    geometry.append([pieces[1], pieces[2], pieces[3]])
+
+            if (in_geom == False) and (len(geometry) > 0):
+                if footer:
+                    footer = footer + "\n" + line
+                else:
+                    if len(line.strip()) > 0:
+                        footer = line
+
+        try:
+            atomic_numbers = np.array(atomic_numbers, dtype=np.int8)
+        except Exception as e:
+            atomic_numbers = np.array(list(map(get_number, atomic_numbers)), dtype=np.int8)
+
+        job_types = cls._assign_job_types(header)
+
+        f = GaussianFile(job_types=job_types, route_card=header, link0=link0, footer=footer, title=title)
+        f.ensemble.add_molecule(Molecule(atomic_numbers, geometry, charge=charge, multiplicity=multip))
+        if return_lines:
+            return f, lines
+        else:
+            return f
+
+    def get_molecule(self, num=None, properties=False):
+        """
+        Returns the last molecule (from an optimization job) or the only molecule (from other jobs).
+
+        If ``num`` is specified, returns ``self.ensemble.molecule_list()[num]``
+        If ``properties`` is True, returns ``(molecule, properties)``.
+        """
+        # some methods pass num=None, which overrides setting the default above
+        if num is None:
+            num = -1
+        assert isinstance(num, int), "num must be int"
+
+        if properties:
+            return self.ensemble.molecule_list()[num], self.ensemble.properties_list()[num]
+        else:
+            return self.ensemble.molecule_list()[num]
+
+    @classmethod
+    def _assign_job_types(cls, header):
+        """
+        Assigns ``GaussianJobType`` objects from route card. ``GaussianJobType.SP`` is assigned by default.
+
+        For instance, "#p opt freq=noraman" would give an output of ``[GaussianJobType.SP, GaussianJobType.OPT, GaussianJobType.FREQ]``.
+
+        Args:
+            header (str): Gaussian route card
+
+        Returns:
+            list of ``GaussianJobType`` objects
+        """
+        job_types = []
+        for name, member in GaussianJobType.__members__.items():
+            if re.search(f" {member.value}", str(header), re.IGNORECASE):
+                job_types.append(member)
+        if GaussianJobType.SP not in job_types:
+            job_types.append(GaussianJobType.SP)
+        return job_types
+
+    def check_has_properties(self):
+        """
+        Checks that the file has all the appropriate properties for its job types, and raises ValueError if not.
+
+        This only checks the last molecule in ``self.ensemble``, for now.
+        """
+        if self.successful_terminations > 0:
+            if self.successful_terminations == 1 and ((GaussianJobType.OPT in self.job_types) and (GaussianJobType.FREQ in self.job_types)):
+                return # opt freq jobs should have two terminations
+            for job_type in self.job_types:
+                for prop in EXPECTED_PROPERTIES[job_type.value]:
+                    if not self.ensemble.has_property(-1, prop):
+                        raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!")
+        else:
+            return
+
+    @classmethod
+    def write_ensemble_to_file(cls, filename, ensemble, route_card, link0={"mem": "32GB", "nprocshared": 16}, footer=None, title="title", print_symbol=False):
+            """
+            Write each structure in the specified ensemble to a single Gaussian input file
+            by using the Link1 specification.
+
+            Args:
+                filename (str): where to write the file
+                ensemble (Ensemble): ``Ensemble`` object to write
+                route_card (str or list): to use the same route card for every link, use a single string;
+                                          otherwise, provide a list whose entries parallel the ensemble members
+                link0 (dict or list of dicts): to use the same memory/processors for every link, use a single string;
+                                               otherwise, provide a list
+                footer (None/str or list): use None for no text after geometry, provide a str to specify a footer,
+                                           or provide some combination of the above as a list
+                title (str or list): use a single string to provide a generic title for every link or a list as above
+                print_symbol (bool or list): whether to print atomic symbols or atomic numbers in the geometry specification;
+                                             use a single bool or a list as above
+
+            """
+            n_geometries = len(ensemble)
+            assert len(ensemble) > 0, "cannot write a blank ensemble"
+
+            if isinstance(route_card, str):
+                route_card = [route_card for _ in ensemble._items]
+            elif isinstance(route_card, list):
+                assert len(route_card) == n_geometries, f"expected {n_geometries} route cards but got {len(route_card)}"
+                for card in route_card:
+                    assert isinstance(card, str), "expected route card to be a str"
+            else:
+                raise ValueError(f"unexpected type for route_card: {str(type(route_card))}")
+
+            if isinstance(link0, dict):
+                link0 = [link0 for _ in ensemble._items]
+            elif isinstance(link0, list):
+                assert len(link0) == n_geometries, f"expected {n_geometries} link0 entries, but got {len(link0)}"
+                for d in link0:
+                    assert isinstance(d, dict), f"expected dict for link0 but got {str(type(d))}"
+            else:
+                raise ValueError(f"unexpected type for link0: {str(type(link0))}")
+
+            if footer is None or isinstance(footer, str):
+                footer = [footer for _ in ensemble._items]
+            elif isinstance(footer, list):
+                assert len(footer) == n_geometries, f"expected {n_geometries} footers, but got {len(footer)}"
+                for f in footer:
+                    assert f is None or isinstance(f, str), f"expected str or None for footer but got {str(type(f))}"
+            else:
+                raise ValueError(f"unexpected type for footer: {str(type(footer))}")
+
+            if isinstance(title, str):
+                assert len(title.strip()) > 0, "zero-length titles not allowed"
+                title = [title for _ in ensemble._items]
+            elif isinstance(title, list):
+                assert len(title) == n_geometries, f"expected {n_geometries} route cards but got {len(title)}"
+                for card in title:
+                    assert isinstance(card, str), "expected title to be a str"
+                    assert len(title.strip()) > 0, "zero-length titles are not allowed"
+            else:
+                raise ValueError(f"unexpected type for title: {str(type(title))}")
+
+            if isinstance(print_symbol, bool):
+                print_symbol = [print_symbol for _ in ensemble._items]
+            elif isinstance(print_symbol, list):
+                assert len(print_symbol) == n_geometries, f"expected {n_geometries} print_symbol entries but got {len(print_symbol)}"
+                for s in print_symbol:
+                    assert isinstance(s, bool), f"expected bool for print_symbol but got {str(type(s))}"
+            else:
+                raise ValueError(f"unexpected type for print_symbol: {str(type(print_symbol))}")
+
+            for idx, molecule in enumerate(ensemble._items):
+                if idx == 0:
+                    cls.write_molecule_to_file(filename, molecule, route_card[idx], link0[idx], footer=footer[idx], title=title[idx], print_symbol=print_symbol[idx], append=False)
+                else:
+                    cls.write_molecule_to_file(filename, molecule, route_card[idx], link0[idx], footer=footer[idx], title=title[idx], print_symbol=print_symbol[idx], append=True)
+
+    def add_custom_basis_set(self, name, add_all_elements=False, return_string=False):
+        """
+        Appends custom basis sets (from Basis Set Exchange) to ``self.footer``. Should be used in combination with the ``gen`` keyword.
+
+        Args:
+            name (str): name of basis set (look it up on Basis Set Exchange)
+            add_all_elements (bool): whether the complete basis set should be added or just the elements of interest
+            return_string (bool): if the basis set should be appended to the footer or returned as a string (no change to ``self``)
+
+        Returns:
+            nothing (if return_string is ``False``)
+            string of basis set definition (if return string is ``True``)
+        """
+        import basis_set_exchange as bse
+        assert isinstance(name, str), "need basis set name to be a string, for starters"
+
+        try:
+            basis_definition = ""
+            if add_all_elements:
+                basis_definition = bse.get_basis(name, fmt="gaussian94", header=False)
+            else:
+                elements = list(np.unique(self.get_molecule().atomic_numbers.view(np.ndarray)))
+                basis_definition = bse.get_basis(name, fmt="gaussian94", header=False, elements=elements)
+
+            if self.footer is None:
+                self.footer = basis_definition
+            else:
+                self.footer += basis_definition
+            self.footer += "\n"
+
+        except Exception as e:
+            raise ValueError(f"adding basis set {name} from basis set exchange failed!\n{e}")
+
+    @classmethod
+    def read_file(cls, filename, return_lines=False, extended_opt_info=False, fail_silently=True):
+#    def read_fast(cls, filename, return_lines=False, extended_opt_info=False):
+        """
+        Reads a Gaussian``.out`` or ``.gjf`` file and populates the attributes accordingly.
+        Only footers from ``opt=modredundant`` can be read automatically --  ``genecep`` custom basis sets, &c must be specified manually.
+
+        Note:
+
+        Will throw ``ValueError`` if there have been no successful iterations.
+
+        Args:
+            filename (str): path to the out file
+            return_lines (Bool): whether the lines of the file should be returned
+            extended_opt_info (Bool): if full parameters about each opt step should be collected
+                (by default, only ``rms_displacement`` and ``rms_force`` are collected)
+            fail_silently (Bool): if true, files that fail validation will just be omitted and parsing will continue.
+                useful for monitoring jobs which are in-progress and may not have all properties written.
+        Returns:
+            ``GaussianFile`` object (or list of ``GaussianFile`` objects for Link1 files)
+            (optional) the lines of the file (or list of lines of file for Link1 files) as Lines object
+        """
+        if re.search("gjf$", filename) or re.search("com$", filename):
+            return cls._read_gjf_file(filename, return_lines)
+
+        link1_lines = parse.split_link1_to_text(filename)
+        files = []
+
+        for link1idx, lines in enumerate(link1_lines):
+            current_file = parse.read_file_fast(lines, filename, link1idx, extended_opt_info=extended_opt_info, fail_silently=fail_silently)
+            if current_file is not None:
+                files.append(current_file)
+
+        if return_lines:
+            link1_lines = parse.split_link1(filename)
+            if len(link1_lines) == 1:
+                return files[0], link1_lines[0]
+            else:
+                return files, link1_lines
+        else:
+            if len(link1_lines) == 1:
+                return files[0]
+            else:
+                return files
+
+
diff --git a/build/lib/cctk/group.py b/build/lib/cctk/group.py
new file mode 100644
index 0000000..1ca0304
--- /dev/null
+++ b/build/lib/cctk/group.py
@@ -0,0 +1,277 @@
+import copy
+import numpy as np
+import networkx as nx
+
+import cctk
+from cctk.helper_functions import get_covalent_radius, compute_angle_between, compute_rotation_matrix
+
+
+class Group(cctk.Molecule):
+    """
+    Class representing a functional group.
+
+    Note that a Group instance does not need to be missing atoms. Rather, the atom given by `attach_to` will be replaced wholesale by another molecule, and the bond distances scaled automatically.
+
+    Attributes:
+        attach_to (int): atom number to replace with larger fragment. must have only one bond! (e.g. H in F3C-H)
+        adjacent (int): atom number that will be bonded to new molecule. (e.g. C in F3C-H)
+        isomorphic (list of lists): list of lists of atoms that should be considered symmetry equivalent.
+            For instance, the three methyl protons can be considered symmetry equivalent, so ``methane.isomorphic = [[3, 4, 5]]``.
+        _map_from_truncated(dict): a dictionary mapping atom numbers of the group without ``attach_to`` to the atom numbers of the normal group
+    """
+
+    def __init__(self, attach_to, isomorphic=None, **kwargs):
+        super().__init__(**kwargs)
+        self.add_attachment_point(attach_to)
+        self._map_from_truncated = None
+
+        if isomorphic is not None:
+            assert isinstance(isomorphic, list), "group.isomorphic must be list of lists!"
+        self.isomorphic = isomorphic
+
+    @classmethod
+    def new_from_molecule(cls, molecule, attach_to, **kwargs):
+        """
+        Convenient method to convert ``molecule`` to ``group`` directly.
+        """
+        group = Group(attach_to, atomic_numbers=molecule.atomic_numbers, geometry=molecule.geometry, bonds=molecule.bonds.edges(), **kwargs)
+        return group
+
+    def add_attachment_point(self, attach_to):
+        """
+        Adds ``attach_to`` and ``adjacent`` attributes to the instance.
+
+        Automatically centers atom ``adjacent`` on the origin, to simplify downstream mathematics.
+        """
+        n_bonds = len(super().get_adjacent_atoms(attach_to))
+        if n_bonds != 1:
+            raise ValueError(f"atom {attach_to} is making {n_bonds} but must make 1 bond to be a valid attachment point")
+
+        self.attach_to = attach_to
+
+        adjacent = super().get_adjacent_atoms(attach_to)
+        assert len(adjacent) == 1, "can't substitute an atom with more than one adjacent atom!"
+        self.adjacent = adjacent[0]
+
+        adj_v = super().get_vector(self.adjacent)
+        super().translate_molecule(-adj_v)
+
+    @staticmethod
+    def add_group_to_molecule(molecule, group, add_to, optimize=True, return_mapping=False):
+        """
+        Adds a `Group` object to a `Molecule` at the specified atom, and returns a new `Molecule` object (generated using `copy.deepcopy()`).
+        Automatically attempts to prevent clashes by minimizing pairwise atomic distances.
+
+        The atom in `group` that replaces `add_to` in `molecule` will inherit the number of `add_to` - however, the other atoms in `group` will be appended to the atom list.
+
+        Args:
+            molecule (Molecule): the molecule to change
+            group (Group): the group to affix
+            add_to (int): the 1-indexed atom number on `molecule` to add `group` to
+            optimize (bool): whether or not to perform automated dihedral optimization
+            return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products
+
+        Returns:
+            new Molecule object
+
+            (optional) molecule_to_new dictionary mapping atom numbers from starting molecule (key) to new atom numbers (val)
+            (optional) group_to_new dictionary mapping atom numbers from starting group (key) to new atom numbers (val)
+        """
+        #### this code can be a bit complex: for an example, let's imagine converting benzene to toluene by adding methane (Group) to benzene (Molecule)
+        ####     add_to would be the benzene H (atom on Molecule you replace with the new group)
+        ####     adjacent_atom would be the benzene C
+        ####     group.attach_to would be the methane H
+        ####     group.adjacent would be the methane C
+
+        #### prevent in-place modification of molecule - could lead to pernicious errors!
+
+        try:
+            add_to = int(add_to)
+        except:
+            raise TypeError("add_to not castable to int")
+
+        molecule = copy.deepcopy(molecule)
+        molecule._check_atom_number(add_to)
+        original_num_atoms = molecule.num_atoms()
+
+        adjacent_atom = molecule.get_adjacent_atoms(add_to)
+        assert (
+            len(adjacent_atom) > 0
+        ), "can't substitute an atom without an adjacent atom! (are there bonds defined for this molecule? consider calling molecule.assign_connectivity()!)"
+        assert len(adjacent_atom) == 1, "can't substitute an atom with more than one adjacent atom!"
+        adjacent_atom = adjacent_atom[0]
+
+        attach_to = group.attach_to
+        other_indices = np.ones_like(group.atomic_numbers).astype(bool)
+        other_indices[attach_to] = False
+        other_indices[group.adjacent] = False
+
+        #### we need to change the bond length somewhat to prevent strange behavior
+        old_radius = get_covalent_radius(molecule.atomic_numbers[add_to])
+        new_radius = get_covalent_radius(group.atomic_numbers[group.adjacent])
+        delta_rad = new_radius - old_radius
+
+        #### make the swap! (this only adds the atoms, still have to get the geometry right)
+        molecule.atomic_numbers[add_to] = group.atomic_numbers[group.adjacent]
+        new_indices = [i + molecule.num_atoms() for i in range(1, np.sum(other_indices) + 1)]
+        molecule.atomic_numbers = np.hstack([molecule.atomic_numbers, group.atomic_numbers[other_indices]])
+        molecule.atomic_numbers = molecule.atomic_numbers.view(cctk.OneIndexedArray)
+
+        #### have to keep track of what all the new indices are, to carry over connectivity
+        new_indices.insert(group.adjacent - 1, add_to)
+        new_indices.insert(attach_to - 1, adjacent_atom)
+
+        #### track atom number mapping
+        molecule_to_new = {z : z for z in range(1, molecule.num_atoms() + 1)}
+        molecule_to_new[add_to] = None
+
+        group_to_new = {}
+        offset = 1
+        for z in range(1, group.num_atoms() + 1):
+            if other_indices[z]:
+                group_to_new[z] = original_num_atoms + offset
+                offset += 1
+            else:
+                group_to_new[z] = None
+        group_to_new[group.adjacent] = add_to
+
+        #### adjust the bond length by moving add_to
+        molecule.set_distance(adjacent_atom, add_to, molecule.get_distance(adjacent_atom, add_to) + delta_rad)
+
+        #### rotate group to match the new positioning
+        v_g = group.get_vector(group.attach_to, group.adjacent)
+        v_m = molecule.get_vector(add_to, adjacent_atom)
+        theta = compute_angle_between(v_g, v_m)
+
+        #### rotate each atom and add it...
+        center_pos = molecule.get_vector(add_to)
+        rot = compute_rotation_matrix(np.cross(v_g, v_m), -(180 - theta))
+        for vector in group.geometry[other_indices]:
+            new_v = np.dot(rot, vector) + center_pos
+            molecule.geometry = np.vstack((molecule.geometry, new_v))
+            molecule.geometry = molecule.geometry.view(cctk.OneIndexedArray)
+
+        #### now we have to merge the new bonds
+        for (atom1, atom2) in group.bonds.edges():
+            molecule.add_bond(new_indices[atom1-1], new_indices[atom2-1])
+        assert molecule.get_bond_order(add_to, adjacent_atom), "we didn't add the bond we were supposed to form!"
+
+        assert len(molecule.atomic_numbers) == len(
+            molecule.geometry
+        ), f"molecule has {len(molecule.atomic_numbers)} atoms but {len(molecule.geometry)} geometry elements!"
+
+        #### now we want to find the "lowest" energy conformation, defined as the rotamer which minimizes the RMS distance between all atoms
+        if group.num_atoms() > 3 and optimize:
+            adjacent_on_old_molecule = molecule.get_adjacent_atoms(adjacent_atom)[0]
+            adjacent_on_new_molecule = molecule.get_adjacent_atoms(add_to)[-1]
+            molecule.optimize_dihedral(adjacent_on_old_molecule, adjacent_atom, add_to, adjacent_on_new_molecule)
+
+        if molecule.check_for_conflicts():
+            if return_mapping:
+                return molecule, molecule_to_new, group_to_new
+            else:
+                return molecule
+        else:
+            raise ValueError(f"molecule contains conflicts!")
+
+    @staticmethod
+    def remove_group_from_molecule(molecule, atom1, atom2, return_mapping=False):
+        """
+        The microscopic reverse of ``add_group_to_molecule`` -- splits a ``Molecule`` along the ``atom1``–``atom2`` bond
+        and returns a new ``Molecule`` object (the ``atom1`` side) and a new ``Group`` (the ``atom2`` side).
+
+        The new objects will be capped with hydrogens; atom ordering will be preserved!
+
+        Args:
+            molecule (Molecule): the molecule to change
+            atom1 (int): the 1-indexed atom number on `molecule` to make part of the new ``Molecule`` object
+            atom2 (int): the 1-indexed atom number on `molecule` to make part of the new ``Group`` object
+            return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products
+
+        Returns:
+            new Molecule object
+            new Group object
+
+            (optional) molecule_to_molecule dictionary mapping atom numbers from starting molecule (key) to new molecule atom numbers (val)
+            (optional) molecule_to_group dictionary mapping atom numbers from starting molecule (key) to new group atom numbers (val)
+        """
+        try:
+            atom1 = int(atom1)
+            atom2 = int(atom2)
+        except:
+            raise TypeError("atom numbers not castable to int")
+
+        molecule = copy.deepcopy(molecule)
+        molecule._check_atom_number(atom1)
+        molecule._check_atom_number(atom2)
+
+        #### define mapping dicts
+        fragment1, fragment2 = molecule._get_bond_fragments(atom1, atom2)
+        molecule_to_molecule = {x: i+1 for i, x in enumerate(fragment1)}
+        molecule_to_group = {x: i+1 for i, x in enumerate(fragment2)}
+
+        #### create new molecules
+        new_mol = cctk.Molecule(molecule.atomic_numbers[fragment1], molecule.geometry[fragment1])
+        group = cctk.Molecule(molecule.atomic_numbers[fragment2], molecule.geometry[fragment2])
+
+        #### add capping H to new_mol
+        new_mol.add_atom("H", molecule.geometry[atom2])
+        molecule_to_molecule[atom2] = new_mol.num_atoms()
+        old_radius = get_covalent_radius(molecule.atomic_numbers[atom2])
+        H_radius = get_covalent_radius(1)
+        new_dist = new_mol.get_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2]) - old_radius + H_radius
+        new_mol.set_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2], new_dist)
+        new_mol.add_bond(molecule_to_molecule[atom1], molecule_to_molecule[atom2])
+
+        #### add capping H to new group
+        group.add_atom("H", molecule.geometry[atom1])
+        molecule_to_group[atom1] = group.num_atoms()
+        old_radius = get_covalent_radius(molecule.atomic_numbers[atom1])
+        new_dist = group.get_distance(molecule_to_group[atom2], molecule_to_group[atom1]) - old_radius + H_radius
+        group.set_distance(molecule_to_group[atom2], molecule_to_group[atom1], new_dist)
+        group.add_bond(molecule_to_group[atom2], molecule_to_group[atom1])
+
+        #### add bonds to nascent molecules
+        molecule.remove_bond(atom1, atom2)
+        for (a1, a2) in molecule.bonds.edges():
+            if a1 in fragment1:
+                assert a2 in fragment1, "somehow we have another bond between the two groups!"
+                assert molecule_to_molecule[a1] is not None, f"we don't have a mapping for atom {a1}"
+                assert molecule_to_molecule[a2] is not None, f"we don't have a mapping for atom {a2}"
+                new_mol.add_bond(molecule_to_molecule[a1], molecule_to_molecule[a2])
+            elif a2 in fragment2:
+                assert a2 in fragment2, "somehow we have another bond between the two groups!"
+                assert molecule_to_group[a1] is not None, f"we don't have a mapping for atom {a1}"
+                assert molecule_to_group[a2] is not None, f"we don't have a mapping for atom {a2}"
+                group.add_bond(molecule_to_group[a1], molecule_to_group[a2])
+
+        #### create Group object from group
+        group = cctk.Group.new_from_molecule(attach_to=molecule_to_group[atom1], molecule=group)
+
+        if return_mapping:
+            return new_mol, group, molecule_to_molecule, molecule_to_group
+        else:
+            return new_mol, group
+
+    def map_from_truncated(self):
+        """
+        Returns a dictionary mapping atomic numbers without ``attach_to`` to atomic_numbers with ``attach_to``.
+        """
+        if self._map_from_truncated is not None:
+            return self._map_from_truncated
+
+        assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+        g = copy.deepcopy(self)
+        g._add_atomic_numbers_to_nodes()
+        tg = copy.deepcopy(g)
+        tg.remove_atom(g.attach_to)
+
+        nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0)
+        match = nx.algorithms.isomorphism.GraphMatcher(g.bonds, tg.bonds, node_match=nm)
+
+        for sg in match.subgraph_isomorphisms_iter():
+            if self.attach_to in sg.keys():
+                continue
+            sg = {v: k for k, v in sg.items()} # invert
+            self._map_from_truncated = sg
+            return sg
diff --git a/build/lib/cctk/groups/AcH.mol2 b/build/lib/cctk/groups/AcH.mol2
new file mode 100644
index 0000000..190bb76
--- /dev/null
+++ b/build/lib/cctk/groups/AcH.mol2
@@ -0,0 +1,29 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+7 6
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -1.0756    -1.3886     0.0424 C
+2 H2    -1.6678    -0.4492     0.0405 H
+3 O3     0.1828    -1.3886     0.0445 O
+4 C4    -1.8969    -2.6913     0.0425 C
+5 H5    -2.9404    -2.4551     0.0428 H
+6 H6    -1.6605    -3.2620    -0.8312 H
+7 H7    -1.6602    -3.2621     0.9161 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 2
+3 1 4 1
+4 4 5 1
+5 4 6 1
+6 4 7 1
diff --git a/build/lib/cctk/groups/BrH.mol2 b/build/lib/cctk/groups/BrH.mol2
new file mode 100644
index 0000000..cab2100
--- /dev/null
+++ b/build/lib/cctk/groups/BrH.mol2
@@ -0,0 +1,19 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+2 1
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 Br1     0.2861     0.4087     0.0000 Br
+2 H2    -1.1539     0.4087     0.0000 H
+@<TRIPOS>BOND
+1 1 2 1
diff --git a/build/lib/cctk/groups/CF3H.mol2 b/build/lib/cctk/groups/CF3H.mol2
new file mode 100644
index 0000000..170ba6b
--- /dev/null
+++ b/build/lib/cctk/groups/CF3H.mol2
@@ -0,0 +1,25 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+5 4
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -0.2897     0.6009     0.0000 C
+2 H2     0.0670    -0.4080     0.0000 H
+3 F3    -1.6397     0.6009     0.0000 F
+4 F4     0.1604     1.2372    -1.1023 F
+5 F5     0.1604     1.2372     1.1023 F
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 4 1
+4 1 5 1
diff --git a/build/lib/cctk/groups/CHOH.mol2 b/build/lib/cctk/groups/CHOH.mol2
new file mode 100644
index 0000000..714fd2c
--- /dev/null
+++ b/build/lib/cctk/groups/CHOH.mol2
@@ -0,0 +1,23 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+4 3
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -1.0756    -1.3886     0.0424 C
+2 H2    -1.6678    -0.4492     0.0424 H
+3 H3    -1.6678    -2.3280     0.0425 H
+4 O4     0.1828    -1.3886     0.0424 O
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 4 2
diff --git a/build/lib/cctk/groups/ClH.mol2 b/build/lib/cctk/groups/ClH.mol2
new file mode 100644
index 0000000..924cf5f
--- /dev/null
+++ b/build/lib/cctk/groups/ClH.mol2
@@ -0,0 +1,19 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+2 1
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 Cl1     0.2861     0.4087     0.0000 Cl
+2 H2    -1.0039     0.4087     0.0000 H
+@<TRIPOS>BOND
+1 1 2 1
diff --git a/build/lib/cctk/groups/EtH.mol2 b/build/lib/cctk/groups/EtH.mol2
new file mode 100644
index 0000000..5556704
--- /dev/null
+++ b/build/lib/cctk/groups/EtH.mol2
@@ -0,0 +1,31 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+8 7
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -0.2897     0.6009     0.0000 C
+2 H2     0.0670    -0.4080     0.0000 H
+3 H3     0.0670     1.1053    -0.8737 H
+4 H4    -1.3597     0.6009     0.0000 H
+5 C5     0.2237     1.3268     1.2574 C
+6 H6    -0.1314     2.3362     1.2565 H
+7 H7    -0.1346     0.8235     2.1311 H
+8 H8     1.2937     1.3251     1.2584 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 4 1
+4 1 5 1
+5 5 6 1
+6 5 7 1
+7 5 8 1
diff --git a/build/lib/cctk/groups/FH.mol2 b/build/lib/cctk/groups/FH.mol2
new file mode 100644
index 0000000..4f303aa
--- /dev/null
+++ b/build/lib/cctk/groups/FH.mol2
@@ -0,0 +1,19 @@
+# Title
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+2 1
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 F1     0.2861     0.4087     0.0000 F
+2 H2    -0.5939     0.4087     0.0000 H
+@<TRIPOS>BOND
+1 1 2 1
diff --git a/build/lib/cctk/groups/HCN.mol2 b/build/lib/cctk/groups/HCN.mol2
new file mode 100644
index 0000000..779ebd6
--- /dev/null
+++ b/build/lib/cctk/groups/HCN.mol2
@@ -0,0 +1,21 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+3 2
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -0.2764     0.5632     0.0000 C
+2 H2    -1.3454     0.5632     0.0000 H
+3 N3     0.8702     0.5632     0.0000 N
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 3
diff --git a/build/lib/cctk/groups/HCO2Me.mol2 b/build/lib/cctk/groups/HCO2Me.mol2
new file mode 100644
index 0000000..993ced6
--- /dev/null
+++ b/build/lib/cctk/groups/HCO2Me.mol2
@@ -0,0 +1,31 @@
+# Title
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+8 7
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -0.5889    -0.6103     0.0000 C
+2 H2    -1.1825     0.3138     0.0000 H
+3 O3     0.6695    -0.6103     0.0000 O
+4 O4    -1.3618    -1.8134    -0.0000 O
+5 C5    -0.7063    -3.0843    -0.0024 C
+6 H6    -0.0937    -3.1713     0.8705 H
+7 H7    -1.4394    -3.8637    -0.0028 H
+8 H8    -0.0954    -3.1689    -0.8768 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 2
+3 1 4 1
+4 4 5 1
+5 5 6 1
+6 5 7 1
+7 5 8 1
diff --git a/build/lib/cctk/groups/HNO2.mol2 b/build/lib/cctk/groups/HNO2.mol2
new file mode 100644
index 0000000..fe6ece2
--- /dev/null
+++ b/build/lib/cctk/groups/HNO2.mol2
@@ -0,0 +1,23 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+4 3
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 N1    -0.2764     0.5632     0.0000 N
+2 H2    -0.7519    -0.3165    -0.0000 H
+3 O3    -0.9543     1.5986     0.0000 O
+4 O4     0.9612     0.5632     0.0000 O
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 Ar
+3 1 4 Ar
diff --git a/build/lib/cctk/groups/IH.mol2 b/build/lib/cctk/groups/IH.mol2
new file mode 100644
index 0000000..d94917f
--- /dev/null
+++ b/build/lib/cctk/groups/IH.mol2
@@ -0,0 +1,19 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+2 1
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 I1     0.2861     0.4087     0.0000 I
+2 H2    -1.3439     0.4087     0.0000 H
+@<TRIPOS>BOND
+1 1 2 1
diff --git a/build/lib/cctk/groups/MeH.mol2 b/build/lib/cctk/groups/MeH.mol2
new file mode 100644
index 0000000..3da4b48
--- /dev/null
+++ b/build/lib/cctk/groups/MeH.mol2
@@ -0,0 +1,25 @@
+# Title
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+5 4
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -0.2897     0.6009     0.0000 C
+2 H2     0.0670    -0.4080     0.0000 H
+3 H3     0.0670     1.1053     0.8737 H
+4 H4     0.0670     1.1053    -0.8737 H
+5 H5    -1.3597     0.6009     0.0000 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 4 1
+4 1 5 1
diff --git a/build/lib/cctk/groups/NH3.mol2 b/build/lib/cctk/groups/NH3.mol2
new file mode 100644
index 0000000..69f5ff0
--- /dev/null
+++ b/build/lib/cctk/groups/NH3.mol2
@@ -0,0 +1,23 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+4 3
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 N1    -0.2897     0.6009     0.0000 N
+2 H2     0.0436    -0.3419     0.0000 H
+3 H3     0.0436     1.0723     0.8165 H
+4 H4     0.0436     1.0723    -0.8165 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 4 1
diff --git a/build/lib/cctk/groups/NHAcH.mol2 b/build/lib/cctk/groups/NHAcH.mol2
new file mode 100644
index 0000000..4276eae
--- /dev/null
+++ b/build/lib/cctk/groups/NHAcH.mol2
@@ -0,0 +1,33 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+9 8
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 N1    -0.2764     0.5632     0.0000 N
+2 H2     0.7069     0.3810    -0.0017 H
+3 C3    -2.3071     1.9480     0.0000 C
+4 H4    -2.6637     1.8678     1.0056 H
+5 H5    -2.6643     2.8587    -0.4335 H
+6 H6    -2.6633     1.1170    -0.5721 H
+7 C7    -0.7671     1.9489     0.0000 C
+8 O8    -0.0196     3.0120    -0.0656 O
+9 H9    -0.9258    -0.1972     0.0017 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 7 1
+3 1 9 1
+4 3 4 1
+5 3 5 1
+6 3 6 1
+7 3 7 1
+8 7 8 Ar
diff --git a/build/lib/cctk/groups/NMe2H.mol2 b/build/lib/cctk/groups/NMe2H.mol2
new file mode 100644
index 0000000..4128bc5
--- /dev/null
+++ b/build/lib/cctk/groups/NMe2H.mol2
@@ -0,0 +1,35 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+10 9
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 N1    -0.2764     0.5632     0.0000 N
+2 H2     0.7236     0.5638    -0.0018 H
+3 C3    -0.7638    -0.1292     1.2016 C
+4 H4    -1.8338    -0.1294     1.2038 H
+5 H5    -0.4054     0.3757     2.0743 H
+6 H6    -0.4070    -1.1380     1.2014 H
+7 C7    -0.7671     1.9489     0.0000 C
+8 H8    -0.4122     2.4529    -0.8746 H
+9 H9    -1.8371     1.9483     0.0019 H
+10 H10    -0.4091     2.4540     0.8727 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 7 1
+4 3 4 1
+5 3 5 1
+6 3 6 1
+7 7 8 1
+8 7 9 1
+9 7 10 1
diff --git a/build/lib/cctk/groups/OH2.mol2 b/build/lib/cctk/groups/OH2.mol2
new file mode 100644
index 0000000..7960d76
--- /dev/null
+++ b/build/lib/cctk/groups/OH2.mol2
@@ -0,0 +1,21 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+3 2
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 O1    -0.2897     0.6009     0.0000 O
+2 H2     0.6703     0.6009     0.0000 H
+3 H3    -0.6102     1.5058     0.0000 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
diff --git a/build/lib/cctk/groups/OMeH.mol2 b/build/lib/cctk/groups/OMeH.mol2
new file mode 100644
index 0000000..15f674b
--- /dev/null
+++ b/build/lib/cctk/groups/OMeH.mol2
@@ -0,0 +1,27 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+6 5
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 O1    -0.2897     0.6009     0.0000 O
+2 H2     0.6703     0.6009    -0.0018 H
+3 C3    -0.7671     1.9489     0.0000 C
+4 H4    -0.4107     2.4535    -0.8737 H
+5 H5    -1.8371     1.9483     0.0000 H
+6 H6    -0.4107     2.4535     0.8737 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 3 4 1
+4 3 5 1
+5 3 6 1
diff --git a/build/lib/cctk/groups/SF5H.mol2 b/build/lib/cctk/groups/SF5H.mol2
new file mode 100644
index 0000000..63f3905
--- /dev/null
+++ b/build/lib/cctk/groups/SF5H.mol2
@@ -0,0 +1,29 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+7 6
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 S1     0.2861     0.4087     0.0000 S
+2 H2     1.5961     0.4087     0.0000 H
+3 F3     0.2861    -1.1813     0.0000 F
+4 F4    -1.3039     0.4087     0.0000 F
+5 F5     0.2861     0.4087     1.5900 F
+6 F6     0.2861     0.4087    -1.5900 F
+7 F7     0.2861     1.9987     0.0000 F
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 4 1
+4 1 5 1
+5 1 6 1
+6 1 7 1
diff --git a/build/lib/cctk/groups/SO3HH.mol2 b/build/lib/cctk/groups/SO3HH.mol2
new file mode 100644
index 0000000..d76b8bb
--- /dev/null
+++ b/build/lib/cctk/groups/SO3HH.mol2
@@ -0,0 +1,27 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+6 5
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 S1     0.2861     0.4087     0.0000 S
+2 H2     1.4395     0.8503    -0.4367 H
+3 O3     0.0384    -1.1462    -0.5567 O
+4 O4     0.2861     0.4087     1.6700 O
+5 O5    -0.9366     1.4006    -0.5567 O
+6 H6    -0.5604     2.1046    -1.0901 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 4 1
+4 1 5 1
+5 5 6 1
diff --git a/build/lib/cctk/groups/__init__.py b/build/lib/cctk/groups/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build/lib/cctk/groups/iPrH.mol2 b/build/lib/cctk/groups/iPrH.mol2
new file mode 100644
index 0000000..6f72d60
--- /dev/null
+++ b/build/lib/cctk/groups/iPrH.mol2
@@ -0,0 +1,37 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+11 10
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -0.2897     0.6009     0.0000 C
+2 H2     0.0670    -0.4080     0.0000 H
+3 H3     0.0670     1.1053    -0.8737 H
+4 C4     0.2237     1.3268     1.2574 C
+5 H5    -0.1314     2.3362     1.2565 H
+6 H6    -0.1346     0.8235     2.1311 H
+7 H7     1.2937     1.3251     1.2584 H
+8 C8    -1.8297     0.6009     0.0000 C
+9 H9    -2.1864     1.6097     0.0004 H
+10 H10    -2.1864     0.0968    -0.8739 H
+11 H11    -2.1864     0.0961     0.8734 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 4 1
+4 1 8 1
+5 4 5 1
+6 4 6 1
+7 4 7 1
+8 8 9 1
+9 8 10 1
+10 8 11 1
diff --git a/build/lib/cctk/groups/tBuH.mol2 b/build/lib/cctk/groups/tBuH.mol2
new file mode 100644
index 0000000..1540153
--- /dev/null
+++ b/build/lib/cctk/groups/tBuH.mol2
@@ -0,0 +1,43 @@
+# Molecule Name
+# Created by GaussView 6.0.16
+#
+
+#
+#
+
+@<TRIPOS>MOLECULE
+Molecule Name
+14 13
+SMALL
+NO_CHARGES
+
+
+@<TRIPOS>ATOM
+1 C1    -0.2897     0.6009     0.0000 C
+2 H2     0.0670    -0.4080     0.0000 H
+3 C3     0.2237     1.3268     1.2574 C
+4 H4    -0.1314     2.3362     1.2565 H
+5 H5    -0.1346     0.8235     2.1311 H
+6 H6     1.2937     1.3251     1.2584 H
+7 C7    -1.8297     0.6009     0.0000 C
+8 H8    -2.1864     1.6097     0.0004 H
+9 H9    -2.1864     0.0968    -0.8739 H
+10 H10    -2.1864     0.0961     0.8734 H
+11 C11     0.2237     1.3268    -1.2574 C
+12 H12     1.2937     1.3270    -1.2573 H
+13 H13    -0.1328     0.8223    -2.1311 H
+14 H14    -0.1332     2.3356    -1.2576 H
+@<TRIPOS>BOND
+1 1 2 1
+2 1 3 1
+3 1 7 1
+4 1 11 1
+5 3 4 1
+6 3 5 1
+7 3 6 1
+8 7 8 1
+9 7 9 1
+10 7 10 1
+11 11 12 1
+12 11 13 1
+13 11 14 1
diff --git a/build/lib/cctk/helper_functions.py b/build/lib/cctk/helper_functions.py
new file mode 100644
index 0000000..0431dc6
--- /dev/null
+++ b/build/lib/cctk/helper_functions.py
@@ -0,0 +1,708 @@
+"""
+Miscellaneous helper functions.
+"""
+
+import numpy as np
+import math, re
+from io import BytesIO
+
+#### python 3.6 or earlier doesn't have importlib.resources, but it's backported as importlib_resources
+try:
+    import importlib.resources as pkg_resources
+except ImportError:
+    import importlib_resources as pkg_resources
+
+from . import data  # relative-import the *package* containing the templates
+import cctk
+
+"""
+This code populates ELEMENT_DICTIONARY and ISOTOPE_DICTIONARY from a static datafile.
+"""
+ELEMENT_DICTIONARY = {}
+ISOTOPE_DICTIONARY = {}
+
+with pkg_resources.open_text(data, "isotopes.csv") as isotope_file:
+    prev_number = 1
+    current_dict = {}
+    for line in isotope_file:
+        symbol, number, mass, abundance = line.split(",")
+        if symbol == "Symbol":
+            continue
+
+        ELEMENT_DICTIONARY[number] = symbol
+
+        if number == prev_number:
+            current_dict[float(mass)] = float(abundance.rstrip())
+        else:
+            ISOTOPE_DICTIONARY[prev_number] = current_dict
+            current_dict = {}
+            current_dict[float(mass)] = float(abundance.rstrip())
+
+        prev_number = number
+
+    ISOTOPE_DICTIONARY[prev_number] = current_dict
+    ELEMENT_DICTIONARY["0"] = "Bq"
+
+INV_ELEMENT_DICTIONARY = {v: int(k) for k, v in ELEMENT_DICTIONARY.items()}
+
+def get_symbol(atomic_number):
+    """
+    Gets element symbol from a given atomic number.
+
+    Args:
+        atomic_number (int): the number of the given element
+
+    Returns:
+        the two-character atomic symbol string
+    """
+    atomic_number = str(atomic_number)
+    if atomic_number in ELEMENT_DICTIONARY:
+        return ELEMENT_DICTIONARY[atomic_number]
+    else:
+        raise ValueError(f"unknown atomic number: '{atomic_number}'")
+
+
+def get_number(atomic_symbol):
+    """
+    Gets atomic number from a given element symbol (converted to titlecase using ``string.title()``).
+
+    Args:
+        atomic_symbol (str): the two-character symbol
+
+    Returns:
+        the atomic number
+    """
+    if atomic_symbol.title() in INV_ELEMENT_DICTIONARY:
+        return int(INV_ELEMENT_DICTIONARY[atomic_symbol.title()])
+    else:
+        raise ValueError("unknown atomic symbol: ", atomic_symbol)
+
+
+"""
+This code populates COVALENT_RADII_DICTIONARY from a static datafile.
+"""
+COVALENT_RADII_DICTIONARY = {}
+with pkg_resources.open_text(data, "covalent_radii.csv") as covalent_radii:
+    for line in covalent_radii:
+        line_fragments = line.split(",")
+
+        #### There's a variable number from line to line, but the first three are always number, symbol, radius
+        if line_fragments[1] == "Symbol":
+            continue
+        COVALENT_RADII_DICTIONARY[line_fragments[0]] = line_fragments[2]
+
+def get_covalent_radius(atomic_number):
+    """
+    Gets the covalent radius for a given element.
+
+    Args:
+        atomic_number (int): the number of the given element
+
+    Returns:
+        the covalent radius in Angstroms (float)
+    """
+    #    if isinstance(atomic_number, int):
+    atomic_number = str(atomic_number)
+    if atomic_number in COVALENT_RADII_DICTIONARY:
+        return float(COVALENT_RADII_DICTIONARY[atomic_number])
+    else:
+        raise ValueError("no covalent radius defined for atomic number ", atomic_number)
+
+"""
+This code populates VDW_RADII_DICTIONARY from a static datafile.
+"""
+VDW_RADII_DICTIONARY = {}
+with pkg_resources.open_text(data, "vdw_radii.csv") as vdw_radii:
+    for line in vdw_radii:
+        line_fragments = line.split(",")
+
+        #### There's a variable number from line to line, but the first three are always number, symbol, radius
+        VDW_RADII_DICTIONARY[line_fragments[0]] = line_fragments[1]
+
+def get_vdw_radius(atomic_number):
+    """
+    Gets the van der Waals radius for a given element.
+
+    Args:
+        atomic_number (int): the number of the given element
+
+    Returns:
+        the van der Waals radius in Angstroms (float)
+    """
+    #    if isinstance(atomic_number, int):
+    atomic_number = str(atomic_number)
+    if atomic_number in VDW_RADII_DICTIONARY:
+        return float(VDW_RADII_DICTIONARY[atomic_number])
+    else:
+        raise ValueError("no van der Waals radius defined for atomic number ", atomic_number)
+
+def compute_distance_between(v1, v2, _norm=np.linalg.norm):
+    """
+    Computes the L2 distance between two vectors.
+
+    (preloading ``_norm`` speeds repeated calls, since Python doesn't have to look up the function every time)
+    """
+    return _norm(v1 - v2)
+
+
+def compute_unit_vector(vector):
+    """
+    Normalizes a vector, returning a unit vector pointing in the same direction.
+    Returns the zero vector if the zero vector is given.
+    """
+    norm = np.linalg.norm(vector)
+    if norm == 0:
+        return vector
+    else:
+        return vector / norm
+
+
+def compute_angle_between(v1, v2, unit="degree"):
+    """
+    Computes the angle between two vectors.
+
+    Args:
+        v1 (ndarray): first vector
+        v2 (ndarray): second vector
+        unit (str): 'degree' or 'radian'
+
+    Returns:
+        the angle between the two vectors
+    """
+    v1_u = compute_unit_vector(v1)
+    v2_u = compute_unit_vector(v2)
+    angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
+    if unit == "degree":
+        return np.degrees(angle) % 360
+    elif unit == "radian":
+        return angle % (2 * math.pi)
+    else:
+        raise ValueError(f"invalid unit {unit}: must be 'degree' or 'radian'!")
+
+
+def compute_dihedral_between(p0, p1, p2, p3, unit="degree"):
+    """
+    Computes the dihedral angle between four points.
+    """
+    b0 = -1.0 * (p1 - p0)
+    b1 = p2 - p1
+    b2 = p3 - p2
+
+    # normalize b1 so that it does not influence magnitude of vector
+    b1 = compute_unit_vector(b1)
+
+    # v = projection of b0 onto plane perpendicular to b1
+    #   = b0 minus component that aligns with b1
+    # w = projection of b2 onto plane perpendicular to b1
+    #   = b2 minus component that aligns with b1
+    v = b0 - np.dot(b0, b1) * b1
+    w = b2 - np.dot(b2, b1) * b1
+
+    # angle between v and w in a plane is the torsion angle
+    # v and w may not be normalized but that's fine since tan is y/x
+    x = np.dot(v, w)
+    y = np.dot(np.cross(b1, v), w)
+
+    angle = np.arctan2(y, x)
+
+    if unit == "degree":
+        return np.degrees(angle) % 360
+    elif unit == "radian":
+        return angle % (2 * math.pi)
+    else:
+        raise ValueError(f"invalid unit {unit}: must be 'degree' or 'radian'!")
+
+
+def compute_rotation_matrix(axis, theta):
+    """
+    Return the rotation matrix for rotation around ``axis`` by ``theta`` degrees..
+    Adapted from user "unutbu" on StackExchange.
+
+    Args:
+        axis (np.ndarray): the vector to rotate about
+        theta (float): how much to rotate (in degrees)
+
+    Returns:
+        the 3x3 rotation matrix
+    """
+    if (not isinstance(axis, np.ndarray)) or (len(axis) != 3):
+        raise TypeError("axis must be np array with 3 elements")
+
+    try:
+        theta = float(theta)
+    except:
+        raise TypeError("theta must be float!")
+
+    theta = np.radians(theta)
+    axis = compute_unit_vector(axis)
+
+    a = math.cos(theta / 2.0)
+    b, c, d = -axis * math.sin(theta / 2.0)
+
+    aa, bb, cc, dd = a * a, b * b, c * c, d * d
+    bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
+    return np.array(
+        [
+            [aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
+            [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
+            [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc],
+        ]
+    )
+
+def align_matrices(P_partial, P_full, Q_partial, return_matrix=False):
+    """
+    Rotates one set of points onto another using the Kabsch algorithm.
+    The rotation that best aligns P_partial into Q_partial will be found and then applied to P_full.
+
+    Args:
+        P_partial (matrix): atoms of P that correspond to Q
+        P_full (matrix): full matrix to rotate
+        Q (matrix): matrix to align to
+
+    Returns:
+        rotated P matrix
+    """
+    assert np.shape(P_partial) == np.shape(Q_partial)
+
+    C = P_partial.T @ Q_partial
+    U, S, Vt = np.linalg.svd(C)
+
+    V = Vt.T
+    d = np.linalg.det(V @ U.T)
+    middle = np.identity(3)
+
+    if d < 0.0:
+        middle[2][2] = -1.0
+
+    rotation = U @ middle @ Vt
+    return P_full @ rotation
+
+def compute_RMSD(geometry1, geometry2, checks=True):
+    """
+    Computes the root mean squared difference between two geometries.
+
+    Args:
+        geometry1 (np.array (dimensions: n atoms x 3): geometry
+        geometry2 (np.array (dimensions: n atoms x 3): geometry
+        checks (bool): whether to check that the inputs make sense (True by default)
+
+    Returns:
+        the root-mean-square distance between the two geometries
+    """
+    if checks and not isinstance(geometry1, cctk.OneIndexedArray):
+        raise ValueError(f"expected cctk.OneIndexedArray but got {str(type(geometry1))} instead")
+    if checks and not isinstance(geometry2, cctk.OneIndexedArray):
+        raise ValueError(f"expected cctk.OneIndexedArray but got {str(type(geometry2))} instead")
+
+    if checks and len(geometry2) != len(geometry1):
+        raise ValueError("can't compare two geometries with different lengths!")
+
+    return np.sqrt( np.sum( ( geometry1.view(np.ndarray) - geometry2.view(np.ndarray) ) ** 2) / len(geometry1) )
+
+def get_isotopic_distribution(z):
+    """
+    For an element with number ``z``, returns two ``np.ndarray`` objects containing that element's weights and relative abundances.
+
+    Args:
+        z (int): atomic number
+
+    Returns:
+        masses (np.ndarray): list of isotope masses
+        weights (np.ndarray): list of weights (relative to 1.00 for largest)
+    """
+    z = str(z)
+    masses = list(ISOTOPE_DICTIONARY[z].keys())
+    weights = list(ISOTOPE_DICTIONARY[z].values())
+    return np.array(masses), np.array(weights)
+
+def get_avg_mass(z):
+    """
+    For an element with number ``z``, return average mass of that element.
+    """
+    masses, weights = get_isotopic_distribution(z)
+    return np.dot(masses, weights)
+
+def get_z_from_mass(desired_mass, tolerance=0.001):
+    """
+    For an element with atomic mass ``desired_mass``, return the element's atomic number.
+
+    Returns ``None`` if no mass within ``tolerance`` is found.
+    """
+    for z in ISOTOPE_DICTIONARY.keys():
+        z = int(z)
+        mass = get_avg_mass(z)
+        if mass == 0:
+            continue
+
+        if abs(desired_mass - mass) < tolerance:
+            return z
+
+def draw_isotopologue(z):
+    """
+    For an element with number ``z``, return a weighted random atomic mass (so will return 12 99% of the time and 13 1% of the time for carbon).
+    """
+    z = str(z)
+    masses, weights = get_isotopic_distribution(z)
+    return np.random.choice(masses, p=weights)
+
+# dict: atomic symbol --> (slope, intercept)
+# defines the slope to be positive
+DEFAULT_NMR_SCALING_FACTORS = {
+        "H" : (1.0716,  31.6660),
+        "C" : (1.0300, 180.4300),
+        "N" : (0.9776, 244.5626)
+}
+
+def scale_nmr_shifts(ensemble, symmetrical_atom_numbers=None, scaling_factors="default", property_name="isotropic_shielding"):
+    """
+    Apply linear scaling to isotropic shieldings to get chemical shifts.
+    Shifts are calculated as (intercept-shielding)/slope.
+    If there are no shifts available for a structure, None will be placed in both
+    return lists.
+
+    Args:
+        ensemble: an ``Ensemble`` with calculated nmr shifts
+        symmetrical_atom_numbers: None to perform no symmetry-averaging, a list of lists
+                                  of 1-indexed atom numbers (e.g. [ [2,4,5], [7,8] ]) for
+                                  a ConformationalEnsemble, or triply-nested lists for an
+                                  Ensemble, where the outer index refers to the index of
+                                  the Ensemble.
+        scaling_factors: "default" to use DEFAULT_NMR_SCALING_FACTORS or a dict
+                         (atomic symbol --> (slope,intercept)).  Elements for
+                         which scaling factors are not provided will be ignored.
+        property_name:   the key in properties_dict to use to locate the predicted
+                         isotropic shieldings (default="isotropic_shielding")
+
+    Returns:
+        scaled_shifts: np.array (matching the shape of the original shieldings minus symmetry averaging)
+        shift_labels: np.array (also matches shape)
+    """
+    # check inputs
+    assert isinstance(ensemble, cctk.Ensemble), f"expected Ensemble but got {str(type(ensemble))} instead"
+    assert len(ensemble) > 0, "empty ensemble not allowed"
+    if symmetrical_atom_numbers is None:
+        symmetrical_atom_numbers = []
+    assert isinstance(symmetrical_atom_numbers, list), f"symmetrical atom numbers should be specified as a list of lists, but got {str(type(ensemble))} instead"
+    for l in symmetrical_atom_numbers:
+        assert isinstance(l, list), f"symmetrical atom numbers must be specified as lists, but got {str(type(l))} instead: {str(l)}"
+    if scaling_factors == "default":
+        scaling_factors = DEFAULT_NMR_SCALING_FACTORS
+    else:
+        assert isinstance(scaling_factors, dict)
+        assert len(scaling_factors) > 0, "must provide scaling factors"
+    assert isinstance(property_name, str) and len(property_name)>0, f"property_name {property_name} is invalid"
+
+    # get shieldings and scale
+    all_scaled_shifts = []
+    all_shift_labels = []
+    for i,(molecule,properties) in enumerate(ensemble.items()):
+        if property_name in properties:
+            # get atom numbers and atomic elements as OneIndexedArrays
+            atomic_numbers = molecule.atomic_numbers
+            n_atoms = len(atomic_numbers)
+            atomic_symbols = [ get_symbol(n) for n in atomic_numbers ]
+            atomic_symbols = cctk.OneIndexedArray(atomic_symbols)
+            atom_numbers = list(range(1,n_atoms+1))
+#            symbol_dict = dict(zip(atomic_numbers,atomic_symbols))
+            all_labels = [ f"{current_symbol}{atom_number}" for current_symbol,atom_number in zip(atomic_symbols,atom_numbers) ]
+            all_labels = cctk.OneIndexedArray(all_labels)
+
+            # check symmetrical atom numbers make sense
+            n_atoms = len(atomic_numbers)
+            symmetrical_groups_dict = {}    # symbol --> [ [list1], [list2], ...] where each list is a group of symmetrical atom numbers
+            symmetrical_groups_dict2 = {}   # symbol --> [ union of all symmetrical atom numbers for this symbol ]
+#            unique_atoms_dict = {}          # symbol --> [ union of all unique atom numbers for this symbol ]
+            for symmetrical_group in symmetrical_atom_numbers:
+                assert len(symmetrical_group) > 1, "must be at least 2 symmetrical nuclei in a group"
+                assert len(symmetrical_group) == len(set(symmetrical_group)), f"check for duplicate atom numbers in {symmetrical_group}"
+                symmetrical_symbol = None
+                for atom_number in symmetrical_group:
+                    assert 1 <= atom_number <= n_atoms, f"atom number {atom_number} is out of range"
+                    if symmetrical_symbol is None:
+                        symmetrical_symbol = atomic_symbols[atom_number]
+                        assert symmetrical_symbol in scaling_factors, f"no scaling factors available for the element {symmetrical_symbol}"
+                    assert atomic_symbols[atom_number] == symmetrical_symbol,\
+                           (f"all atoms in a symmetrical group must correspond to the same element\n"
+                            f"expected element {symmetrical_symbol} for atom {atom_number},"
+                            f"but got element {atomic_symbols[atom_number]}")
+                if symmetrical_symbol not in symmetrical_groups_dict:
+                    symmetrical_groups_dict[symmetrical_symbol] = []
+                symmetrical_groups_dict[symmetrical_symbol].append(symmetrical_group)
+                if symmetrical_symbol not in symmetrical_groups_dict2:
+                    symmetrical_groups_dict2[symmetrical_symbol] = []
+                symmetrical_groups_dict2[symmetrical_symbol].extend(symmetrical_group)
+
+            # get shieldings
+            all_shieldings = properties[property_name]
+
+            # iterate through requested elements
+            molecule_shifts = []
+            molecule_labels = []
+            for symbol_of_interest,(slope,intercept) in scaling_factors.items():
+                # sanity checks
+                assert isinstance(slope,float), f"expected slope to be float, but got {str(type(slope))}"
+                assert slope != 0, "zero slope not allowed"
+                assert isinstance(intercept,float), f"expected intercept to be float, but got {str(type(intercept))}"
+
+                # determine unique atoms 
+                unique_atom_numbers_list = []
+                for atomic_symbol,atom_number in zip(atomic_symbols,atom_numbers):
+                    if atomic_symbol != symbol_of_interest:
+                        continue
+                    if symbol_of_interest in symmetrical_groups_dict2:
+                        if atom_number in symmetrical_groups_dict2[symbol_of_interest]:
+                            continue
+                    unique_atom_numbers_list.append(atom_number)
+
+                # extract relevant shieldings and labels for unique atoms
+                if len(unique_atom_numbers_list) > 0:
+                    selected_shieldings = list(all_shieldings[unique_atom_numbers_list])
+                    selected_labels = list(all_labels[unique_atom_numbers_list])
+                else:
+                    selected_shieldings = []
+                    selected_labels = []
+
+                # extract relevant shieldings and labels for symmetrical groups
+                symmetrical_groups = []
+                if symbol_of_interest in symmetrical_groups_dict:
+                    symmetrical_groups = symmetrical_groups_dict[symbol_of_interest]
+                for symmetrical_group in symmetrical_groups:
+                    first_atom_number = symmetrical_group[0]
+                    current_atomic_symbol = atomic_symbols[first_atom_number]
+                    if current_atomic_symbol == symbol_of_interest:
+                        group_shieldings = all_shieldings[symmetrical_group]
+                        averaged_shielding = group_shieldings.mean()
+                        selected_shieldings.append(averaged_shielding)
+                        label = f"{current_atomic_symbol}"
+                        for j,atom_number in enumerate(symmetrical_group):
+                            label += f"{atom_number}"
+                            if j < len(symmetrical_group) - 1:
+                                label += "/"
+                        selected_labels.append(label)
+
+                # apply scaling
+                assert len(selected_shieldings) == len(selected_labels), "shieldings and labels should have 1:1 correspondence"
+                selected_shifts = np.array(selected_shieldings)
+                selected_shifts = (intercept-selected_shifts)/slope
+                selected_labels = np.array(selected_labels)
+
+                # update results
+                molecule_shifts.extend(selected_shifts)
+                molecule_labels.extend(selected_labels)
+
+            # update master results if appropriate
+            if len(molecule_shifts) > 0:
+                all_scaled_shifts.append(molecule_shifts)
+                all_shift_labels.append(molecule_labels)
+            else:
+                # assume this means a bug
+                raise ValueError("no relevant shieldings were extracted for this molecule!")
+        else:
+            # there are no shieldings available, so append None
+            all_scaled_shifts.append(None)
+            all_shift_labels.append(None)
+
+    # return result
+    scaled_shifts = np.array(all_scaled_shifts)
+    shift_labels = np.array(all_shift_labels)
+    return scaled_shifts, shift_labels
+
+def compute_chirality(v1, v2, v3, v4):
+    """
+    Given 4 bond vectors, returns 1 or -1 based on chirality.
+    For proper Cahn–Ingold–Prelog results, vectors should be passed from highest to lowest priority; however, any predictable order will give meaningful results.
+
+    Args:
+        v1 (np.ndarray): 3D bond vector
+        v2 (np.ndarray): 3D bond vector
+        v3 (np.ndarray): 3D bond vector
+        v4 (np.ndarray): 3D bond vector
+
+    Returns:
+        value of 1 (R by CIP) or -1 (S by CIP)
+    """
+    assert (isinstance(v1, np.ndarray) and len(v1) == 3), "v1 needs to be a 3-element np.ndarray!"
+    assert (isinstance(v2, np.ndarray) and len(v2) == 3), "v2 needs to be a 3-element np.ndarray!"
+    assert (isinstance(v3, np.ndarray) and len(v3) == 3), "v3 needs to be a 3-element np.ndarray!"
+    assert (isinstance(v4, np.ndarray) and len(v4) == 3), "v4 needs to be a 3-element np.ndarray!"
+
+    e1 = np.array([1, 0, 0])
+    e3 = np.array([0, 0, 1])
+
+    # rotate v4 so that it's pointing back!
+    axis1 = np.cross(v4, e1)
+    theta1 = compute_angle_between(e1, v4)
+
+    R1 = compute_rotation_matrix(axis1, theta1)
+    v1 = R1 @ v1
+    v2 = R1 @ v2
+    v3 = R1 @ v3
+    v4 = R1 @ v4
+
+    assert 1.0 > compute_angle_between(v4, e1), "rotating v4 failed"
+
+    # rotate v1 so that it's pointing up!
+    axis2 = v4
+    theta2 = compute_angle_between(np.array([0, v1[1], v1[2]]), e3) # projection of v1 onto e2•e3 plane
+
+    R2 = compute_rotation_matrix(axis2, np.sign(v1[1]) * theta2)
+    v1 = R2 @ v1
+    v2 = R2 @ v2
+    v3 = R2 @ v3
+    v4 = R2 @ v4
+
+    assert 1.0 > compute_angle_between(v4, e1), f"rotating v4 failed - 1.0 ≤ {compute_angle_between(v4, e1)}"
+    assert 1.0 > compute_angle_between(e3, np.array([0, 0, v1[2]])), f"rotating v1 failed, - 1.0 ≤ {compute_angle_between(e3, np.array([0, 0, v1[2]]))}"
+
+    answer = np.sign(v2[1])
+    assert np.sign(v3[1]) != answer, "at this point our two substituents are on the same side of the plane that's supposed to divide them"
+    return answer
+
+# constants for calculating entropy
+ENTROPY_FACTOR_1 = 1.43877695998381562 # 2.99792458E10 * 6.62606957E-34 / 1.3806488E-23
+ENTROPY_FACTOR_2 = 1.9872041348        # 8.3144621 / 4.184
+
+def get_entropy(frequencies, temperature):
+    """
+        Computes the total entropy of a given set of frequencies.
+
+        Args:
+            frequencies (list): in cm-1
+            temperature (float): in K
+
+        Returns:
+            entropy (float): in hartree
+    """
+    factor0 = ENTROPY_FACTOR_1 / temperature
+    entropy = 0.0
+    for frequency in frequencies:
+        factor = factor0 * frequency
+        temp = factor * 1.0/(math.exp(factor)-1.0) - math.log(1.0-math.exp(-factor))
+        temp = temp * ENTROPY_FACTOR_2
+        entropy += temp
+    return entropy / 627.509469
+
+def get_corrected_free_energy(free_energy, frequencies, frequency_cutoff=100.0, temperature=298.15):
+    """
+        Computes the free energy by moving all positive frequencies below ``frequency_cutoff``
+        to the cutoff.  See Cramer/Truhlar, J. Phys. Chem. B, 2011, 115, 14556.
+
+        Args:
+            free_energy (float): in hartree
+            frequencies (list): in cm-1
+            frequency_cutoff (float): in cm-1
+            temperature (float): in K
+
+        Returns:
+            corrected_free_energy (float): in hartree
+    """
+    low_frequencies = []
+    for frequency in frequencies:
+        if frequency > 0 and frequency < frequency_cutoff:
+            low_frequencies.append(frequency)
+    entropy_uncorrected = get_entropy(low_frequencies, temperature)
+    entropy_corrected = get_entropy([frequency_cutoff], temperature) * len(low_frequencies)
+    entropy_correction = (entropy_uncorrected - entropy_corrected)*temperature/1000.0
+    corrected_free_energy = free_energy + entropy_correction
+    return corrected_free_energy
+
+def numpy_to_bytes(arr):
+    """ Utility function for pickling numpy arrays """
+    arr_bytes = BytesIO()
+    np.save(arr_bytes, arr, allow_pickle=True)
+    arr_bytes = arr_bytes.getvalue()
+    return arr_bytes
+
+def bytes_to_numpy(arr_bytes):
+    """ Utility function for unpickling numpy arrays """
+    load_bytes = BytesIO(arr_bytes)
+    loaded_np = np.load(load_bytes, allow_pickle=True)
+    return loaded_np
+
+def compute_mass_spectrum(formula_dict, **kwargs):
+    """
+    Computes the expected low-res mass spec ions for a given formula.
+
+    Args:
+        formula dict (dict): e.g. {"C": 6, "H": 6}
+
+    Returns:
+        list of m/z ions
+        list of relative weights (out of 1 total)
+    """
+    form_vec = np.zeros(shape=92, dtype=np.int8)
+    for z, n in formula_dict.items():
+        if isinstance(z, str):
+            z = get_number(z)
+        assert isinstance(z, int), "atomic number must be integer"
+        form_vec[z] += n
+
+    masses, weights = _recurse_through_formula(form_vec, [0], [1], **kwargs)
+
+    new_masses, indices = np.unique(np.round(masses, decimals=1), return_inverse=True)
+    new_weights = np.zeros_like(new_masses)
+    for k in range(len(new_weights)):
+        new_weights[k] = np.sum(weights[np.nonzero(indices == k)])
+    new_weights = new_weights / np.max(new_weights)
+
+    return new_masses, new_weights
+
+def _recurse_through_formula(formula, masses, weights, cutoff=0.0000001, mass_precision=4, weight_precision=8):
+    """
+    Recurses through a formula and generates m/z isotopic pattern using tail recursion.
+
+    To prevent blowup of memory, fragments with very low abundance are ignored. Masses and weights are also rounded after every step.
+    To prevent error accumulation, internal precisions several orders of magnitude lower than the precision of interest should be employed.
+    The default values should work nicely for low-res MS applications.
+
+    Args:
+        formula (np.ndarray, dtype=np.int8): vector containing atoms left to incorporate. first element should always be 0 as there is no element 0.
+        masses (np.ndarray): list of mass fragments at current iteration
+        weights (np.ndarray): relative weights at current iteration
+        cutoff (float): cutoff for similarity (masses within ``cutoff`` will be combined)
+        mass_precision (int): number of decimal places to store for mass
+        weight_precision (int): number of decimal places to store for weight
+
+    Returns:
+        masses
+        weights
+    """
+    # check how many elements we haven't recursed thru yet
+    if np.array_equal(formula, np.zeros(shape=92, dtype=np.int8)):
+        return masses[np.argsort(masses)], weights[np.argsort(masses)]
+
+    # get masses/weights for current element
+    current_e = np.nonzero(formula)[0][0]
+    e_masses, e_weights = get_isotopic_distribution(current_e)
+
+    # combinatorially add the new masses and weights to our current lists
+    new_masses = np.zeros(shape=(len(masses)*len(e_masses)))
+    new_weights = np.zeros(shape=(len(masses)*len(e_masses)))
+    for i in range(len(masses)):
+        for j in range(len(e_masses)):
+            new_masses[i*len(e_masses)+j] = masses[i] + e_masses[j]
+            new_weights[i*len(e_masses)+j] = weights[i] * e_weights[j]
+
+    # delete duplicates and adjust weights (complicated)
+    newer_masses, indices = np.unique(np.round(new_masses, decimals=mass_precision), return_inverse=True)
+    newer_weights = np.zeros_like(newer_masses)
+    for k in range(len(newer_weights)):
+        newer_weights[k] = np.sum(new_weights[np.nonzero(indices == k)])
+    newer_weights = np.round(newer_weights, decimals=weight_precision)
+
+    # prune the low-abundance masses/weights and move on to the next element
+    formula[current_e] += -1
+    above_cutoff = np.nonzero(newer_weights > cutoff)
+    return _recurse_through_formula(formula, newer_masses[above_cutoff], newer_weights[above_cutoff], cutoff, mass_precision, weight_precision)
+
+def formula_dict_from_string(formula_string):
+    """
+    Eugene challenged me to code golf, this isn't my fault.
+
+    Args:
+        formula_string (str): the formula as a string, e.g. C10H12N2O1. you need the "1" explicitly
+
+    Returns:
+        formula_dict (dict): e.g. {'C': 10, 'H': 12, 'N': 2, 'O': 1}
+    """
+    return {t[0]: int(t[1]) for t in re.findall(r"([a-z]+)([0-9]+)", formula_string, re.I)}
diff --git a/build/lib/cctk/lines.py b/build/lib/cctk/lines.py
new file mode 100644
index 0000000..417945d
--- /dev/null
+++ b/build/lib/cctk/lines.py
@@ -0,0 +1,163 @@
+import re
+from itertools import islice
+
+class LazyLineObject:
+    """
+    Instead of storing ``lines`` as an array, this object can be used.
+    It reduces the memory usage drastically! It looks up lines only when needed.
+    """
+    def __init__(self, file, start, end):
+       self.file = file
+       self.start = start
+       self.end = end
+
+    def __len__(self):
+        return self.end - self.start
+
+    def __str__(self):
+        return f"LazyLineObject for file {self.file}, lines {self.start}-{self.end}"
+
+    def __repr__(self):
+        return f"LazyLineObject for file {self.file}, lines {self.start}-{self.end}"
+
+    def __iter__(self):
+        with open(self.file, "r") as lines:
+            for line in islice(lines, self.start, self.end + 1):
+                yield line.rstrip("\n")
+
+    def __getitem__(self, key):
+        if key >= len(self):
+            raise KeyError("key too big")
+        with open(self.file, "r") as lines:
+            for line in islice(lines, self.start + key, self.start + key + 1):
+                return line.rstrip()
+
+    def full_text(self):
+        text = ""
+        with open(self.file, "r") as lines:
+            for line in islice(lines, self.start, self.end + 1):
+                text += line.rstrip() + "\n"
+        return text
+
+    def search_for_block(self, start, end, count=1, join=" ", max_len=1000, format_line=None):
+        """
+        Search through a file (lines) and locate a block starting with "start" (inclusive) and ending with "end" (exclusive).
+
+        Args:
+            start (str): a pattern that matches the start of the block (can contain special characters)
+            end (str): a pattern that matches the end of the block (can contain special characters) - ``None`` removes this (so a selection of ``max_lines`` is guaranteed)
+            count (int): how many matches to search for
+            join (str): spacer between lines
+            max_len (int): maximum length of matches (to prevent overflow)
+            format_line (function): function to perform to each line before adding to match (e.g. remove leading space)
+
+        Returns:
+            a single match (str) if count == 1 or a list of matches (str) if count > 1.
+        """
+        assert isinstance(count, int), "count needs to be an integer"
+        assert isinstance(max_len, int), "count needs to be an integer"
+        assert isinstance(join, str), "join needs to be a string"
+
+        if count == 0:
+            return None
+
+        current_match = ""
+        current_len = 0
+        match = [None] * count
+
+        #### we want a regex that will never match anything - and quickly - so trying to match something before the start of the line works
+        if end is None:
+            end = "a^"
+
+        start_pattern = re.compile(start)
+        end_pattern = re.compile(end)
+
+        index = 0
+        for line in self:
+            if current_match:
+                if end_pattern.search(line) or current_len >= max_len:
+                    match[index] = current_match
+                    current_match = None
+                    index += 1
+                    current_len = 0
+
+                    if index == count:
+                        break
+                else:
+                    if format_line is not None:
+                        current_match = current_match + join + format_line(line.lstrip())
+                    else:
+                        current_match = current_match + join + line.lstrip()
+                    current_len += 1
+            else:
+                if start_pattern.search(line):
+                    if format_line is not None:
+                        current_match = format_line(line.lstrip())
+                    else:
+                        current_match = line.lstrip()
+                    current_len = 1
+
+        if count == 1:
+            return match[0]
+        else:
+            return match
+
+
+    def find_parameter(self, parameter, expected_length, which_field, split_on=None, cast_to_float=True):
+        """
+        Args:
+            parameter (string): test to search for
+            expected_length (int): how many fields there should be
+            which_field (int or list): which field(s) the parameter is (zero-indexed)
+            split_on (str): additional non-space field on which to split
+            cast_to_float (Bool): whether or not to cast extracted value to float
+        Returns:
+            a list of all the extracted values
+        """
+        if not isinstance(which_field, list):
+            which_field = [which_field]
+
+        if not isinstance(expected_length, int):
+            raise TypeError("expected_length must be type int!")
+
+        for n in which_field:
+            if not isinstance(n, int):
+                raise TypeError("which_field must be type int!")
+            if n >= expected_length:
+                raise ValueError("can't expect a field after the last field!")
+
+        matches = []
+        pattern = False
+
+        try:
+            pattern = re.compile(parameter)
+        except Exception as e:
+            raise ValueError("pattern {pattern} cannot be compiled as a regex; try again!")
+
+        if pattern:
+            for line in self:
+                if pattern.search(line):
+                    fields = re.split(" +", line)
+                    if split_on:
+                        fields2 = []
+                        for field in fields:
+                            fields2 = fields2 + field.split(split_on)
+                        fields = fields2
+                    fields = list(filter(None, fields))
+
+                    if len(fields) == expected_length:
+                        desired_fields = []
+                        for n in which_field:
+                            if cast_to_float:
+                                try:
+                                    desired_fields.append(float(fields[n]))
+                                except:
+                                    desired_fields.append(0)
+                            else:
+                                desired_fields.append(fields[n])
+                        if len(desired_fields) == 1:
+                            matches.append(desired_fields[0])
+                        else:
+                            matches.append(desired_fields)
+            return matches
+
diff --git a/build/lib/cctk/load_groups.py b/build/lib/cctk/load_groups.py
new file mode 100644
index 0000000..d2c3db3
--- /dev/null
+++ b/build/lib/cctk/load_groups.py
@@ -0,0 +1,109 @@
+try:
+    import importlib.resources as pkg_resources
+except ImportError:
+    import importlib_resources as pkg_resources
+
+from cctk import MOL2File, Group
+from . import groups
+
+filenames = [
+    "MeH.mol2",
+    "EtH.mol2",
+    "iPrH.mol2",
+    "tBuH.mol2",
+    "OH2.mol2",
+    "OMeH.mol2",
+    "NHAcH.mol2",
+    "NH3.mol2",
+    "NMe2H.mol2",
+    "CF3H.mol2",
+    "HCN.mol2",
+    "HNO2.mol2",
+    "HCO2Me.mol2",
+    "FH.mol2",
+    "ClH.mol2",
+    "BrH.mol2",
+    "IH.mol2",
+    "SF5H.mol2",
+    "SO3HH.mol2",
+    "AcH.mol2",
+    "CHOH.mol2",
+]
+
+names = [
+    ["methyl", "Me", "CH3",],
+    ["ethyl", "Et", "C2H5",],
+    ["isopropyl", "iPr", "iC3H7",],
+    ["tert-butyl", "tBu", "tC4H9",],
+    ["hydroxy", "OH",],
+    ["methoxy", "MeO", "OMe", "CH3O",],
+    ["acetamido", "NHAc",],
+    ["amino", "NH2",],
+    ["dimethylamino", "Me2N", "NMe2",],
+    ["trifluoromethyl", "CF3",],
+    ["cyano", "CN",],
+    ["nitro", "NO2",],
+    ["carboxylmethyl", "MeO2C", "CO2Me",],
+    ["fluoro", "F",],
+    ["chloro", "Cl",],
+    ["bromo", "Br",],
+    ["iodo", "I",],
+    ["pentafluorosulfanyl", "SF5",],
+    ["sulfonyl", "SO3H",],
+    ["acetyl", "Ac", "COMe",],
+    ["formyl", "CHO",],
+]
+
+isomorphic = [
+    [[3, 4, 5]],
+    None,
+    [[4, 8], [9, 10, 11, 5, 6, 7]],
+    [[3, 7, 11], [4, 5, 6, 8, 9, 10, 12, 13, 14]],
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+]
+
+def load_group(name):
+    filename = None
+    iso = None
+
+    for row in names:
+        if name in row:
+            filename = filenames[names.index(row)]
+            iso = isomorphic[names.index(row)]
+            break
+
+    assert filename is not None, f"can't find name {name}!"
+
+    with pkg_resources.path(groups, filename) as file:
+        mol = MOL2File.read_file(file).ensemble.molecules[0]
+        mol.assign_connectivity()
+
+        #### every molecule is set so you need to attach to atom 2
+        new_group = Group.new_from_molecule(attach_to=2, molecule=mol, isomorphic=iso)
+        return new_group
+
+def group_iterator(symmetric_only=False):
+    """
+    Returns a generator over all *cctk*-predefined groups.
+    """
+    for row, iso in zip(names, isomorphic):
+        if symmetric_only:
+            if iso is None:
+                continue
+        yield load_group(row[0])
diff --git a/build/lib/cctk/mae_file.py b/build/lib/cctk/mae_file.py
new file mode 100644
index 0000000..09d3e08
--- /dev/null
+++ b/build/lib/cctk/mae_file.py
@@ -0,0 +1,278 @@
+import re
+import numpy as np
+import networkx as nx
+
+from cctk import File, Ensemble, ConformationalEnsemble, Molecule
+from cctk.helper_functions import get_number
+
+
+class MAEFile(File):
+    """
+    Class representing Maestro ``.mae`` files.
+
+    Attributes:
+        name (str): name of file
+        ensemble (Ensemble): ``Ensemble`` or ``ConformationalEnsemble`` object
+    """
+
+    def __init__(self, name=None):
+        if isinstance(name, str):
+            self.name = name
+
+    @classmethod
+    def read_file(cls, filename, name=None, **kwargs):
+        """
+        Reads ``.mae`` file and generates a ``MAEFile`` instance.
+
+        Args:
+            filename (str): path to file
+            name (str): name of the file
+
+        Returns:
+            MAEFile object
+            property names (list)
+            property_values (list)
+        """
+
+        file = MAEFile(name=name)
+
+        (geometries, symbols, bonds, p_names, p_vals, conformers) = cls._read_mae(filename, **kwargs)
+        atomic_numbers = np.array([get_number(z) for z in symbols], dtype=np.int8)
+
+        if conformers == True:
+            file.ensemble = ConformationalEnsemble()
+        else:
+            file.ensemble = Ensemble()
+
+        for geom in geometries:
+            file.ensemble.add_molecule(Molecule(atomic_numbers, geom, bonds=bonds.edges))
+
+        return file, p_names, p_vals
+
+    @classmethod
+    def _read_mae(
+        cls, filename, contains_conformers="check", save_memory_for_conformers=True, print_status_messages=False,
+    ):
+        """
+        Reads uncompressed Macromodel files.
+
+        Args:
+            filename (str): path to file
+            contains_conformers (str): one of ``check``, ``True``, or ``False``
+            save_memory_for_conformers (Bool):
+            print_status_messages (Bool):
+
+        Returns:
+            geometries (np.ndarray): array of 3-tuples of geometries
+            symbols (np.ndarray): array of atom symbols (str)
+            bonds (nx.Graph): ``NetworkX`` graph of bond information
+            property_names:
+            property_values:
+            contains_conformers (Bool): whether or not the file contains conformers
+        """
+        # read file
+        if print_status_messages:
+            print(f"Reading {filename}...", end="", flush=True)
+        lines = super().read_file(filename)
+        if print_status_messages:
+            print(f"read {len(lines)} lines...", end="", flush=True)
+
+        # initialize arrays
+        geometries = []
+        symbols = []
+        bonds = []
+        property_names = []
+        property_values = []
+        this_geometry = None
+        this_symbols = None
+        this_bonds = None
+        this_property_names = None
+        this_property_values = None
+
+        # parse file
+        i = 0
+        current_block_type = None
+        while i < len(lines):
+            # read the current line
+            line = lines[i]
+            i += 1
+
+            # determine if we are in a molecule block
+            end_of_file = i + 1 == len(lines)
+            if current_block_type is None and (line.startswith("f_m_ct") or end_of_file):
+                # store the current results if any
+                if this_geometry is not None and len(this_geometry) > 0:
+                    geometries.append(this_geometry)
+                    symbols.append(this_symbols)
+                    bonds.append(this_bonds)
+                    property_names.append(this_property_names)
+                    property_values.append(this_property_values)
+
+                # prepare to read a new molecule
+                current_block_type = "property_names"
+                this_geometry = []
+                this_symbols = []
+                this_bonds = None
+                this_property_names = []
+                this_property_values = []
+                continue
+
+            # read property names
+            elif current_block_type == "property_names":
+                line = line.strip()
+                if line.startswith("i_m_ct_format"):
+                    next_line = lines[i].strip()
+                    if next_line != ":::":
+                        raise ValueError(f"expected ':::' here but line {i+1} is:\n{next_line}\n")
+                    current_block_type = "property_values"
+                    i += 1
+                elif line.startswith(":::"):
+                    raise ValueError(f"expected to see i_m_ct_format as the last property (line {i+1})")
+                else:
+                    fields = re.split(" +", line)
+                    if len(fields) != 1:
+                        raise ValueError(f"unexpected number of fields in property name line: {line}")
+                    this_property_names.append(line)
+
+            # read property values
+            elif current_block_type == "property_values":
+                n_properties = len(this_property_names)
+                for j in range(n_properties):
+                    this_property_values.append(lines[i + j])
+                i += n_properties
+                current_block_type = "looking_for_geometry1"
+
+            # look for geometry block
+            elif current_block_type == "looking_for_geometry1":
+                if line.startswith("  m_atom"):
+                    current_block_type = "looking_for_geometry2"
+            elif current_block_type == "looking_for_geometry2":
+                if line.strip() == ":::":
+                    current_block_type = "geometry_block"
+
+            # parse geometry
+            elif current_block_type == "geometry_block":
+                line = line.strip()
+                if line == ":::":
+                    current_block_type = "bond_block"
+
+                    # initialize bond connectivity graph
+                    this_bonds = nx.Graph()
+                    n_atoms = len(this_symbols)
+                    this_bonds.add_nodes_from(range(1, n_atoms + 1))
+                    i += 7
+                else:
+                    fields = re.split(" +", line)
+                    x, y, z = float(fields[2]), float(fields[3]), float(fields[4])
+                    this_geometry.append((x, y, z))
+                    symbol = fields[-1]
+                    this_symbols.append(symbol)
+
+            # parse bonds
+            elif current_block_type == "bond_block":
+                line = line.strip()
+                if line == ":::":
+                    current_block_type = None
+                else:
+                    fields = re.split(" +", line)
+                    bond_number, atom1, atom2, bond_order = (
+                        int(fields[0]),
+                        int(fields[1]),
+                        int(fields[2]),
+                        int(fields[3]),
+                    )
+                    n_atoms = len(this_geometry)
+                    if not 1 <= atom1 <= n_atoms or not 1 <= atom2 <= n_atoms:
+                        raise ValueError(f"atom number out of range: {line}")
+                    bond_order = int(fields[3])
+                    if bond_order <= 0:
+                        raise ValueError(f"zero or negative bond order: {line}")
+                    if this_bonds.number_of_edges() != bond_number - 1:
+                        raise ValueError(f"non-sequential bond number (expected {this_bonds.number_of_edges()+1} but got {bond_number})")
+                    if this_bonds.has_edge(atom1, atom2):
+                        current_bond_order = this_bonds[atom1][atom2]["weight"]
+                        if current_bond_order != bond_order:
+                            raise ValueError(f"inconsistent bond order definition: {line}")
+                    this_bonds.add_edge(atom1, atom2, weight=bond_order)
+                    this_bonds.add_edge(atom2, atom1, weight=bond_order)
+
+        # convert to numpy array
+        geometries = np.array(geometries)
+        symbols = np.array(symbols)
+        property_names = np.array(property_names)
+        property_values = np.array(property_values)
+
+        # determine if these are conformers
+        if contains_conformers == "check":
+            contains_conformers = True
+            for this_symbols, this_bonds in zip(symbols[1:], bonds[1:]):
+                # must have the same symbols and bonds
+                if not (symbols[0] == this_symbols).all() or not nx.is_isomorphic(bonds[0], this_bonds):
+                    contains_conformers = False
+                    break
+        elif isinstance(contains_conformers, bool):
+            pass
+        else:
+            raise ValueError("contains_conformers must be 'check' or boolean")
+
+        # if requested, just store one copy of symbols and bonds
+        if save_memory_for_conformers and contains_conformers:
+            symbols = symbols[0]
+            bonds = bonds[0]
+
+        # return result
+        n_geometries = len(geometries)
+        if print_status_messages:
+            if n_geometries > 1:
+                if contains_conformers:
+                    n_atoms = len(geometries[0])
+                    n_bonds = bonds.number_of_edges()
+                    if print_status_messages:
+                        print(f"read {n_geometries} conformers ({n_atoms} atoms and {n_bonds} bonds).")
+                else:
+                    min_n_atoms = len(geometries[0])
+                    max_n_atoms = len(geometries[0])
+                    for geometry in geometries[1:]:
+                        if len(geometry) > max_n_atoms:
+                            max_n_atoms = len(geometry)
+                        elif len(geometry) < min_n_atoms:
+                            min_n_atoms = len(geometry)
+                    min_n_bonds = bonds[0].number_of_edges()
+                    max_n_bonds = bonds[0].number_of_edges()
+                    for this_bonds in bonds[1:]:
+                        if this_bonds.number_of_edges() > max_n_bonds:
+                            max_n_bonds = this_bonds.number_of_edges()
+                        elif this_bonds.number_of_edges() < min_n_bonds:
+                            min_n_bonds = bonds.number_of_edges
+                    if print_status_messages:
+                        print(f"read {n_geometries} unrelated geometries ({min_n_atoms}-{max_n_atoms} atoms and {min_n_bonds}-{max_n_bonds}) bonds).")
+            else:
+                n_atoms = len(geometries)
+                n_bonds = bonds.number_of_edges()
+                if print_status_messages:
+                    print(f"read one geometry ({n_atoms} atoms and {n_bonds} bonds).")
+
+        # return result
+        return (
+            geometries,
+            symbols,
+            bonds,
+            property_names,
+            property_values,
+            contains_conformers,
+        )
+
+    def get_molecule(self, num=None):
+        """
+        Returns the last molecule from the ensemble.
+
+        If ``num`` is specified, returns ``self.ensemble.molecules[num]``
+        """
+        # some methods pass num=None, which overrides setting the default above
+        if num is None:
+            num = -1
+
+        if not isinstance(num, int):
+            raise TypeError("num must be int")
+
+        return self.ensemble.molecules[num]
diff --git a/build/lib/cctk/mol2_file.py b/build/lib/cctk/mol2_file.py
new file mode 100644
index 0000000..44dddea
--- /dev/null
+++ b/build/lib/cctk/mol2_file.py
@@ -0,0 +1,351 @@
+import re
+import numpy as np
+import networkx as nx
+
+from cctk import File, Ensemble, ConformationalEnsemble, Molecule
+from cctk.helper_functions import get_symbol, get_number
+
+
+class MOL2File(File):
+    """
+    Class representing SYBYL ``.mol2`` files.
+
+    Attributes:
+        name (str): name of file
+        ensemble (Ensemble): ``Ensemble`` or ``ConformationalEnsemble`` object
+    """
+
+    def __init__(self, name=None):
+        if isinstance(name, str):
+            self.name = name
+
+    @classmethod
+    def read_file(cls, filename, name=None, **kwargs):
+        """
+        Reads ``.mol2`` file and generates a ``MOL2File`` instance.
+
+        Args:
+            filename (str): path to file
+            name (str): name of the file
+
+        Returns:
+            MOL2File object
+        """
+
+        file = MOL2File(name=name)
+
+        (geometries, all_clean_symbols, all_symbols, all_bonds, conformers) = cls._read_mol2(filename, **kwargs)
+        assert len(all_bonds) == len(geometries)
+        for bonds in all_bonds:
+            assert isinstance(bonds, nx.Graph)
+            assert len(bonds) == len(geometries[0])
+
+        if conformers:
+            # convert atom types to atomic numbers
+            atomic_numbers = []
+            for atom_type in all_symbols[0]:
+                assert isinstance(atom_type,str), f"unexpected atom_type type: {type(atom_type)} / {atom_type}"
+                fields = atom_type.split(".")
+                symbol = fields[0]
+                symbol = re.sub("[^A-Za-z]","",symbol)
+                atomic_number = get_number(symbol)
+                atomic_numbers.append(atomic_number)
+            atomic_numbers = np.asarray(atomic_numbers, dtype=np.int8)
+
+            # create ensemble
+            file.ensemble = ConformationalEnsemble()
+            for geometry in geometries:
+                molecule = Molecule(atomic_numbers, geometry, bonds=all_bonds[0].edges, checks=False)
+                file.ensemble.add_molecule(molecule, checks=False)
+        else:
+            file.ensemble = Ensemble()
+            for this_symbols,geometry in zip(all_symbols,geometries):
+                atomic_numbers=[]
+                for atom_type in this_symbols:
+                    assert isinstance(atom_type,str), f"unexpected atom_type type: {type(atom_type)} / {atom_type}"
+                    fields = atom_type.split(".")
+                    symbol = fields[0]
+                    symbol = re.sub("[^A-Za-z]","",symbol)
+                    atomic_number = get_number(symbol)
+                    atomic_numbers.append(atomic_number)
+                atomic_numbers = np.asarray(atomic_numbers, dtype=np.int8)
+                molecule = Molecule(atomic_numbers, geometry, bonds=bonds.edges)
+                file.ensemble.add_molecule(molecule)
+
+        return file
+
+    @classmethod
+    def _read_mol2(
+        cls, filename, contains_conformers="check", save_memory_for_conformers=True, print_status_messages=False,
+    ):
+        """
+        Reads .mol2 files into cctk.
+
+        Args:
+            filename str): the name of the .mol2 file
+
+            contains_conformers('check' or bool): if set to 'check', multiple geometries
+                                                in the same file will be compared to see
+                                                if they are conformers.  Alternatively,
+                                                force the geometries to be treated as
+                                                conformers (True) or not (False).  This
+                                                latter option increases performance,
+                                                particularly for large files.
+
+            print_status_messages (bool): if True, update the progerss of the parsing operation to stdout.
+
+        Returns:
+            all_geometries, all_clean_symbols, all_symbols, all_bonds, contains_conformers
+
+            all_geometries: np.ndarray(geometry number, atom number, xyz) -> position (float)
+            all_clean_symbols: np.ndarray(geometry number, atom number) -> atom symbol (:obj:`str`)
+            all_symbols: np.ndarray(geometry number, atom number) -> atom symbol (:obj:`str`)
+            all_bonds: list(geometry_number) -> bond connectivity (:obj:`nx.Graph`)
+            contains_conformers: bool (True if the geometries correspond to conformers.)
+        """
+        # read file
+        if print_status_messages:
+            print(f"Reading {filename}...", end="", flush=True)
+        lines = super().read_file(filename)
+        if print_status_messages:
+            print(f"read {len(lines)} lines...", end="", flush=True)
+
+        # initialize arrays
+        all_geometries = []
+        all_symbols = []
+        all_clean_symbols = []
+        all_bonds = []
+        this_geometry = []
+        this_symbols = []
+        this_clean_symbols = []
+        this_bonds = None
+
+        # parse file
+        i = 0
+        in_geometry_block = False
+        in_bond_block = False
+        bond_number = 0
+        while i < len(lines):
+            # read the current line
+            line = lines[i]
+
+            # determine if we are in a geometry block
+            if line.startswith("@<TRIPOS>ATOM"):
+                # step forward to the first geometry line
+                in_geometry_block = True
+                in_bond_block = False
+                i += 1
+                line = lines[i]
+                if contains_conformers == True and len(all_symbols) > 0:
+                    this_symbols = all_symbols[0]
+                    this_clean_symbols = all_clean_symbols[0]
+            elif line.startswith("@<TRIPOS>BOND"):
+                # update status
+                in_geometry_block = False
+                in_bond_block = True
+                bond_number = 0
+
+                # get next line
+                i += 1
+                line = lines[i]
+
+                # initialize connectivity graph
+                if len(this_geometry) == 0:
+                    raise ValueError("got to bond table without a geometry")
+                if contains_conformers == True and len(all_bonds) > 0:
+                    this_bonds = all_bonds[0]
+                else:
+                    this_bonds = nx.Graph()
+                    this_bonds.add_nodes_from(range(1, len(this_geometry) + 1))
+
+            # parse geometry if appropriate
+            if in_geometry_block:
+                fields = line.split()
+                if len(fields) < 6:
+                    print("Error parsing file:")
+                    print("Line = '%s'" % line.strip())
+                    print(fields)
+                    break
+                x, y, z = float(fields[2]), float(fields[3]), float(fields[4])
+                this_geometry.append([x, y, z])
+                if contains_conformers != True or len(all_symbols)==0:
+                    symbol = fields[5]
+                    clean_symbol = fields[1]
+                    this_symbols.append(symbol)
+                    this_clean_symbols.append(clean_symbol)
+            elif in_bond_block:
+                fields = line.split()
+                if len(fields) == 4 and (len(all_bonds)==0 or contains_conformers != True):
+                    # parse bonds, checking that the bonds are increasing
+                    try:
+                        this_bond_number = int(fields[0])
+                        atom1 = int(fields[1])
+                        atom2 = int(fields[2])
+                        n_atoms = len(this_geometry)
+                        if not 1 <= atom1 <= n_atoms or not 1 <= atom2 <= n_atoms:
+                            raise ValueError(f"atom number out of range: {line}")
+                        if fields[3] == "ar":
+                            bond_order = 1
+                        else:
+                            bond_order = int(fields[3])
+                        if bond_order <= 0:
+                            raise ValueError(f"zero or negative bond order: {line}")
+                        if this_bond_number != bond_number + 1:
+                            raise ValueError("non-sequential bond number")
+                        bond_number = this_bond_number
+                        if this_bonds.has_edge(atom1, atom2):
+                            current_bond_order = this_bonds[atom1][atom2]["weight"]
+                            if current_bond_order != bond_order:
+                                raise ValueError(f"inconsistent bond order definition: {line}")
+                        this_bonds.add_edge(atom1, atom2, weight=bond_order)
+                        this_bonds.add_edge(atom2, atom1, weight=bond_order)
+                    except Exception as e:
+                        # assume we have left the bond block
+                        in_geometry_block = False
+                        in_bond_block = False
+                else:
+                    # we have left the bond block
+                    in_geometry_block = False
+                    in_bond_block = False
+
+            # go to next line
+            i += 1
+
+            # store geometry and reinitialize if appropriate
+            end_of_file = i == len(lines)
+            end_of_blocks = not in_geometry_block and not in_bond_block
+            if (end_of_file or end_of_blocks) and len(this_geometry) > 0:
+                all_geometries.append(this_geometry)
+                all_clean_symbols.append(this_clean_symbols)
+                all_symbols.append(this_symbols)
+                all_bonds.append(this_bonds)
+                this_geometry = []
+                this_symbols = []
+                this_clean_symbols = []
+                this_bonds = None
+
+        # convert to numpy array
+        all_geometries = np.array(all_geometries)
+        all_symbols = np.array(all_symbols)
+        all_clean_symbols = np.array(all_clean_symbols)
+
+        # determine if these are conformers
+        if contains_conformers == "check":
+            contains_conformers = True
+            for symbols, bonds in zip(all_symbols[1:], all_bonds[1:]):
+                # must have the same symbols and bonds
+                if not (all_symbols[0] == symbols).all() or not nx.is_isomorphic(all_bonds[0], bonds):
+                    contains_conformers = False
+                    break
+        elif isinstance(contains_conformers, bool):
+            pass
+        else:
+            raise ValueError("contains_conformers must be 'check' or boolean")
+
+        # return result
+        n_geometries = len(all_geometries)
+        if print_status_messages:
+            if n_geometries > 1:
+                if contains_conformers:
+                    n_atoms = len(all_geometries[0])
+                    n_bonds = all_bonds[0].number_of_edges()
+                    if print_status_messages:
+                        print(f"read {n_geometries} conformers ({n_atoms} atoms and {n_bonds} bonds).")
+                else:
+                    min_n_atoms = len(all_geometries[0])
+                    max_n_atoms = len(all_geometries[0])
+                    for geometry in all_geometries[1:]:
+                        if len(geometry) > max_n_atoms:
+                            max_n_atoms = len(geometry)
+                        elif len(geometry) < min_n_atoms:
+                            min_n_atoms = len(geometry)
+                    min_n_bonds = all_bonds[0].number_of_edges()
+                    max_n_bonds = all_bonds[0].number_of_edges()
+                    for bonds in all_bonds[1:]:
+                        if bonds.number_of_edges() > max_n_bonds:
+                            max_n_bonds = bonds.number_of_edges()
+                        elif bonds.number_of_edges() < min_n_bonds:
+                            min_n_bonds = bonds.number_of_edges
+                    if print_status_messages:
+                        print(f"read {n_geometries} unrelated geometries ({min_n_atoms}-{max_n_atoms} atoms and {min_n_bonds}-{max_n_bonds}) bonds).")
+            else:
+                n_atoms = len(all_geometries)
+                n_bonds = all_bonds[0].number_of_edges()
+                if print_status_messages:
+                    print(f"read one geometry ({n_atoms} atoms and {n_bonds} bonds).")
+
+        return (all_geometries, all_clean_symbols, all_symbols, all_bonds, contains_conformers)
+
+    def get_molecule(self, num=None):
+        """
+        Returns the last molecule from the ensemble.
+
+        If ``num`` is specified, returns ``self.ensemble.molecules[num]``
+        """
+        # some methods pass num=None, which overrides setting the default above
+        if num is None:
+            num = -1
+
+        if not isinstance(num, int):
+            raise TypeError("num must be int")
+
+        return self.ensemble.molecules[num]
+
+    @classmethod
+    def write_molecule_to_file(cls, filename, molecule, title=None, append=False):
+        """
+        Write a ``.gjf`` file using the given molecule.
+
+        Args:
+            filename (str): path to the new file
+            molecule (Molecule): which molecule to use -- a``Molecule`` object.
+            title (str): title of the file
+            append (Bool): whether to write to file normally or append
+        """
+        assert isinstance(molecule, Molecule), "molecule is not a valid Molecule object!"
+
+        text = f"# {title}\n#\n#\n\n#\n#\n\n"
+        text += f"@<TRIPOS>MOLECULE\n{title}\n{molecule.num_atoms()} {molecule.bonds.number_of_edges()}\nSMALL\nNO_CHARGES\n\n\n"
+        text += "@<TRIPOS>ATOM\n"
+        for idx, z in enumerate(molecule.atomic_numbers, start=1):
+            v = molecule.get_vector(idx)
+            text += f"{idx} {get_symbol(z)}{idx}    {v[0]: .4f}    {v[1]: .4f}    {v[2]: .4f} {get_symbol(z)} 0\n"
+        text += "@<TRIPOS>BOND\n"
+        count = 1
+        for atom1, atom2, weight in molecule.bonds.edges.data("weight", default=1):
+            text += f"{count} {atom1} {atom2} {weight}\n"
+            count += 1
+
+        if append:
+            super().append_to_file(filename, text)
+        else:
+            super().write_file(filename, text)
+
+    def write_file(self, filename, molecule=-1, **kwargs):
+        """
+        Write a ``.mol2`` file, using object attributes.
+
+        Args:
+            filename (str): path to the new file
+            molecule (int): which molecule to use -- passed to ``self.get_molecule()``.
+                Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble.molecules (0-indexed).
+                A ``Molecule`` object can also be passed, in which case that molecule will be written to the file.
+        """
+        if molecule is None or isinstance(molecule, (np.integer, int)):
+            molecule = self.ensemble.molecules[molecule]
+        self.write_molecule_to_file(filename, molecule, **kwargs)
+
+    @classmethod
+    def write_ensemble_to_file(cls, filename, ensemble):
+        """
+        Write each structure in the specified ensemble to a single mol2 file.
+
+        Args:
+            filename (str): where to write the file
+            ensemble (Ensemble): ``Ensemble`` object to write
+        """
+        for idx, molecule in enumerate(ensemble.molecules):
+            if idx == 0:
+                cls.write_molecule_to_file(filename, molecule, append=False)
+            else:
+                cls.write_molecule_to_file(filename, molecule, append=True)
diff --git a/build/lib/cctk/molecule.py b/build/lib/cctk/molecule.py
new file mode 100644
index 0000000..2ccd3d3
--- /dev/null
+++ b/build/lib/cctk/molecule.py
@@ -0,0 +1,1832 @@
+import math, copy, re
+import numpy as np
+import networkx as nx
+from scipy.spatial.distance import cdist
+import pkg_resources
+import yaml
+
+import cctk
+from cctk.helper_functions import (
+    get_symbol,
+    get_number,
+    compute_rotation_matrix,
+    compute_distance_between,
+    compute_angle_between,
+    compute_dihedral_between,
+    compute_unit_vector,
+    get_covalent_radius,
+    get_vdw_radius,
+    numpy_to_bytes,
+    bytes_to_numpy,
+    _recurse_through_formula,
+)
+import cctk.topology as top
+
+class Molecule:
+    """
+    Class representing a single molecular geometry.
+
+    In contrast to typical Python behavior, ``atomic_numbers`` and ``geometry`` are indexed from one, to simplify interfacing with computational chemistry programs.
+    This has been done by defining a custom wrapper for ``numpy.ndarray`` called ``cctk.OneIndexedArray``.
+
+    All other datatypes are indexed from 0.
+
+    Attributes:
+        name (str): for identification, optional
+        atomic_numbers (cctk.OneIndexedArray, dtype=np.int8): list of atomic numbers
+        geometry (cctk.OneIndexedArray): list of 3-tuples of xyz coordinates - same ordering as ``atomic_numbers``
+        bonds (nx.Graph or list of tuples): connectivity graph or list of 2-tuples, with each element representing the 1-indexed atom number of a bonded pair
+        charge (int): the charge of the molecule
+        multiplicity (int): the spin state of the molecule (1 corresponds to singlet, 2 to doublet, 3 to triplet, etc. -- so a multiplicity of 1 is equivalent to S=0)
+        vibrational_modes (list of cctk.VibrationalMode): vibrational modes
+    """
+
+    def __init__(self, atomic_numbers, geometry, name=None, bonds=None, charge=0, multiplicity=1, checks=True):
+        """
+        Create new Molecule object, and assign connectivity if needed.
+
+        ``bonds`` must be a list of edges (i.e. an n x 2 ``numpy`` array).
+
+        If ``checks`` is True, the atomic numbers in bonds will all be checked for consistency.
+        This option can be disabled by setting ``checks`` to False, but this is not recommended for external data.
+        """
+        if len(atomic_numbers) != len(geometry):
+            raise ValueError(f"length of geometry ({len(geometry)}) and atomic_numbers ({len(atomic_numbers)}) does not match!\n{atomic_numbers}\n{geometry}")
+
+        try:
+            atomic_numbers = np.asarray(atomic_numbers, dtype=np.int8).view(cctk.OneIndexedArray)
+        except Exception as e:
+            raise ValueError("invalid atom list")
+
+        try:
+            geometry = np.array(geometry, dtype=np.float32).view(cctk.OneIndexedArray)
+        except Exception as e:
+            raise TypeError("geometry cannot be cast to ``np.ndarray`` of floats!")
+
+        if name is not None:
+            if not isinstance(name, str):
+                raise TypeError("name must be a string!")
+
+        if not isinstance(charge, int):
+            try:
+                charge = int(charge)
+            except Exception as e:
+                raise TypeError("charge must be integer or castable to integer!")
+
+        if not isinstance(multiplicity, int):
+            try:
+                multiplicity = int(multiplicity)
+            except Exception as e:
+                raise TypeError("multiplicity must be positive integer or castable to positive integer")
+        assert multiplicity > 0, "multiplicity must be positive"
+
+        self.atomic_numbers = atomic_numbers
+        self.geometry = geometry
+
+        self.name = name
+        self.multiplicity = multiplicity
+        self.charge = charge
+
+        self.vibrational_modes = list()
+
+        if isinstance(bonds, nx.Graph):
+            self.bonds = bonds
+        elif isinstance(bonds, (list,np.ndarray,nx.classes.reportviews.EdgeView)):
+            if checks:
+                known_atomic_numbers = set()
+                for bond in bonds:
+                    assert len(bond)==2, f"unexpected number of atoms in bond, expected 2, got {len(bond)}"
+                    if bond[0] not in known_atomic_numbers:
+                        self._check_atom_number(bond[0])
+                        known_atomic_numbers.add(bond[0])
+                    if bond[1] not in known_atomic_numbers:
+                        self._check_atom_number(bond[1])
+                        known_atomic_numbers.add(bond[1])
+
+            self.bonds = nx.Graph()
+            self.bonds.add_nodes_from(range(1, len(atomic_numbers) + 1))
+            self.bonds.add_edges_from(bonds, weight=1)
+        elif bonds is None:
+            self.bonds = nx.Graph()
+            self.bonds.add_nodes_from(range(1, len(atomic_numbers)+1))
+        else:
+            raise ValueError(f"unexpected type for bonds: {type(bonds)}")
+
+    def __str__(self):
+        if self.name is not None:
+            return f"Molecule (name={self.name}, {len(self.atomic_numbers)} atoms)"
+        else:
+            return f"Molecule ({len(self.atomic_numbers)} atoms)"
+
+    def __repr__(self):
+        return str(self) # placeholder
+
+#    def __eq__(self, other):
+    @classmethod
+    def equal(cls, mol1, mol2):
+        """
+        Atomic numbers, geometry, charge, and multiplicity all must match. Name is irrelevant.
+        """
+        if not isinstance(mol1, cctk.Molecule):
+            return False
+
+        if not isinstance(mol2, cctk.Molecule):
+            return False
+
+        comparisons = [
+            np.array_equal(mol1.atomic_numbers, mol2.atomic_numbers),
+            np.array_equal(mol1.geometry, mol2.geometry),
+            mol1.charge == mol2.charge,
+            mol1.multiplicity == mol2.multiplicity
+        ]
+
+        return all(comparisons)
+
+    def assign_connectivity(self, cutoff=0.2, periodic_boundary_conditions=None):
+        """
+        Automatically recalculates bonds based on covalent radii. If two atoms are closer than the sum of their covalent radii + ``cutoff`` Angstroms,
+        then they are considered bonded.
+
+        Args:
+            cutoff (float): the threshold (in Angstroms) for how close two covalent radii must be to be considered bonded
+
+        Returns:
+            self
+        """
+
+        #### delete all edges
+        self.bonds = nx.create_empty_copy(self.bonds)
+
+        assert isinstance(cutoff, (float, int)), "need cutoff to be numeric!"
+        g = self.geometry.view(np.ndarray)
+
+        dist_matrix = None
+
+        #### cdist is SO FAST
+        if periodic_boundary_conditions is None:
+            dist_matrix = cdist(g, g, "euclidean")
+        else:
+            # even 16 cdist calls is faster than any other implementation, i tested it
+            pbc = periodic_boundary_conditions
+            assert isinstance(pbc, np.ndarray) and len(pbc) == 3, "Need 3-element ``np.ndarray`` for PBCs"
+
+            nearby_cells = [
+                [0, 0, 0],
+                [pbc[0], 0, 0],
+                [0, pbc[1], 0],
+                [0, 0, pbc[2]],
+                [pbc[0], pbc[1], 0],
+                [pbc[0], 0, pbc[2]],
+                [0, pbc[1], pbc[2]],
+                [pbc[0], pbc[1], pbc[2]],
+            ]
+
+            dist_matrices = [cdist(g, g + np.array(nc), "euclidean") for nc in nearby_cells]
+            dist_matrices += [cdist(g, g - np.array(nc), "euclidean") for nc in nearby_cells]
+            distances_3d = np.stack(dist_matrices)
+            dist_matrix = distances_3d.min(axis=0)
+
+        covalent_radii = {z: get_covalent_radius(z) for z in set(self.atomic_numbers)}
+        radii_by_num = [covalent_radii[z] for z in self.atomic_numbers]
+
+        for i in range(1, self.num_atoms() + 1):
+            r_i = radii_by_num[i-1]
+            for j in range(i + 1, self.num_atoms() + 1):
+                distance = dist_matrix[i-1][j-1]
+                r_j = radii_by_num[j-1]
+
+                # 0.5 A distance is used by RasMol and Chime (documentation available online) and works well, empirically
+                if distance < (r_i + r_j + cutoff):
+                    self.add_bond(i, j)
+
+        return self
+
+    def check_for_conflicts(self, min_buffer=1, group1=None, group2=None):
+        """
+        Automatically checks for conflicts based on covalent radii. If two atoms are closer than the sum of their covalent radii + buffer, then they are considered clashing.
+        If `group1` and `group2` are selected, then conflicts will only be evaluated between these two groups of atoms.
+
+        Args:
+            min_buffer (float): the threshold (in Angstroms) for how close two covalent radii must be to be considered clashing. 1.0 A is default, empirically.
+            group1 (list): atoms to evaluate against `group2` (if `None`, defaults to all atoms)
+            group2 (list): atoms to evaluate against `group1` (if `None`, defaults to all atoms)
+
+        Returns:
+            True if there are no conflicts
+            ValueError if there is a conflict
+        """
+
+        if group1 is None:
+            group1 = list(range(1, self.num_atoms() + 1))
+
+        if group2 is None:
+            group2 = list(range(1, self.num_atoms() + 1))
+
+        for atom in group1 + group2:
+            self._check_atom_number(atom)
+
+        for i in group1:
+            for j in group2:
+                if i == j:
+                    continue
+                distance = self.get_distance(i, j, check=False)
+                r_i = get_covalent_radius(self.get_atomic_number(i))
+                r_j = get_covalent_radius(self.get_atomic_number(j))
+
+                # 0.5 A distance is used by RasMol and Chime (documentation available online) and works well, empirically
+                if distance < (r_i + r_j - min_buffer):
+#                    raise ValueError(f"atoms {i} and {j} are too close - distance {distance} A!")
+                    return False
+
+        return True
+
+    def add_bond(self, atom1, atom2, bond_order=1, check=True):
+        """
+        Adds a new bond to the bond graph, or updates the existing bond order. Will not throw an error if the bond already exists.
+
+        Args:
+            atom1 (int): the number of the first atom
+            atom2 (int): the number of the second atom
+            bond_order (int): bond order of bond between atom1 and atom2
+        """
+        if check:
+            self._check_atom_number(atom1)
+            self._check_atom_number(atom2)
+            assert isinstance(bond_order, int), f"bond order {bond_order} must be an integer"
+            assert bond_order >= 0, f"bond order {bond_order} must be positive"
+
+        if self.bonds.has_edge(atom1, atom2):
+            if bond_order == 0:
+                self.bonds.remove_edge(atom1, atom2)
+            else:
+                if self.bonds[atom1][atom2]["weight"] != bond_order:
+                    self.bonds[atom1][atom2]["weight"] = bond_order
+        elif bond_order > 0:
+            self.bonds.add_edge(atom1, atom2, weight=bond_order)
+
+    def remove_bond(self, atom1, atom2):
+        """
+        Alias for ``self.add_bond(atom1, atom2, bond_order=0)`` -- more intuitive nomenclature.
+        """
+        self.add_bond(atom1, atom2, bond_order=0)
+
+    def _check_atom_number(self, number):
+        """
+        Helper method which performs quick checks on the validity of a given atom number.
+        """
+        assert isinstance(number, int), "atomic number must be integer"
+        assert 0 < number <= self.num_atoms(), "atom number {number} too large! (or too small - needs to be >0)"
+
+    def formula(self, return_dict=False):
+        """
+        Returns the atomic formula.
+
+        If ``return_dict`` is ``True``, then returns a ``dictionary`` with keys elemental symbols and values the number of occurrences.
+
+        For instance, ``water.formula()`` would return ``{'O': 1, 'H': 2}``.
+
+        If ``return_dict`` is ``False``, then returns a stringified version of the formula according to standard rules.
+
+        For instance, ``water.formula()`` would return ``H2O``.
+
+        Args:
+            return_dict (Bool): if the method should return a string or a dictionary
+
+        Returns:
+            a dictionary or string representing the molecule's formula
+        """
+
+        formula_dict = {}
+        for atom in self.atomic_numbers:
+            symbol = get_symbol(atom)
+            if symbol in formula_dict:
+                formula_dict[symbol] += 1
+            else:
+                formula_dict[symbol] = 1
+        if return_dict == True:
+            return formula_dict
+        else:
+            formula = ""
+            elements = list(formula_dict.keys())
+
+            #### H and C always come first
+            if "C" in elements:
+                elements.remove("C")
+                formula += f"C{formula_dict['C']}"
+
+            if "H" in elements:
+                elements.remove("H")
+                formula += f"H{formula_dict['H']}"
+
+            for element in sorted(elements):
+                formula += f"{element}{formula_dict[element]}"
+
+            return formula
+
+    def _get_bond_fragments(self, atom1, atom2):
+        """
+        Returns the pieces of a molecule that one would obtain by ereaking the bond between two atoms. Will throw ``ValueError`` if the atoms are in a ring.
+        Useful for distance/angle/dihedral scans -- one fragment can be moved and the other held constant.
+
+        Args:
+            atom1 (int): the number of the first atom
+            atom2 (int): the number of the second atom
+
+        Returns:
+            fragment1: the list of atoms in fragment 1 (containing atom1)
+            fragment2: the list of atoms in fragment 2 (containing atom2)
+
+        """
+
+        self._check_atom_number(atom1)
+        self._check_atom_number(atom2)
+
+        assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+
+        bond_order = self.get_bond_order(atom1, atom2)
+        if self.bonds.has_edge(atom1, atom2):
+            self.bonds.remove_edge(atom1, atom2)
+
+            fragments = nx.connected_components(self.bonds)
+            fragment1 = []
+            fragment2 = []
+
+            for fragment in fragments:
+                if atom1 in fragment:
+                    if atom2 in fragment:
+                        self.add_bond(atom1, atom2, bond_order) # not adding back this bond causes some pretty pernicious errors
+                        raise ValueError(f"Atom {atom1} and atom {atom2} are in a ring or otherwise connected!")
+                    else:
+                        fragment1 = fragment
+                if atom2 in fragment:
+                    fragment2 = fragment
+
+            self.add_bond(atom1, atom2, bond_order)
+            return list(fragment1), list(fragment2)
+        else:
+            raise ValueError(f"No bond between atom {atom1} and atom {atom2}!")
+
+    def _get_fragment_containing(self, atom):
+        """
+        Get the fragment containing the atom with number ``atom``.
+
+        Args:
+            atom (int): the number of the atom
+
+        Returns:
+            a list of all the atoms in the fragment
+        """
+
+        self._check_atom_number(atom)
+
+        fragments = nx.connected_components(self.bonds)
+
+        for fragment in fragments:
+            if atom in fragment:
+                return list(fragment)
+
+    def set_distance(self, atom1=None, atom2=None, distance=None, move="group", atoms=None):
+        """
+        Adjusts the ``atom1`` -- ``atom2`` bond length to be a fixed distance by moving atom2.
+
+        If ``move`` is set to "group", then all atoms bonded to ``atom2`` will also be moved.
+
+        If ``move`` is set to "atom", then only atom2 will be moved.
+
+        Args:
+            atom1 (int): the number of the first atom
+            atom2 (int): the number of the second atom
+            distance (float): distance in Angstroms of the final bond
+            move (str): determines how fragment moving is handled
+            atoms (list): 2-element list of atom numbers
+
+        Returns:
+            the Molecule object
+        """
+
+        if (atom1 is None) and (atom2 is None):
+            assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!"
+            assert len(atoms) == 2, "need 2 atom numbers to set distance"
+            atom1 = atoms[0]
+            atom2 = atoms[1]
+
+        assert isinstance(distance, (float, int, np.number)), "need distance to set distance"
+
+        self._check_atom_number(atom1)
+        self._check_atom_number(atom2)
+
+        if (not isinstance(distance, float)) or (distance < 0):
+            raise ValueError(f"invalid value {distance} for distance!")
+
+        atoms_to_move = []
+        if move == "group":
+            if self.get_bond_order(atom1, atom2):
+                _, atoms_to_move = self._get_bond_fragments(atom1, atom2)
+            else:
+                atoms_to_move = self._get_fragment_containing(atom2)
+        elif move == "atom":
+            atoms_to_move = [atom2]
+        else:
+            raise ValueError(f"Invalid option {move} for parameter 'move'!")
+
+        if (atom1 in atoms_to_move and atom2 in atoms_to_move) and move == "group":
+            raise ValueError('both our atoms are connected which will preclude any movement with ``move`` set to "group"')
+
+        current_distance = self.get_distance(atom1, atom2)
+
+        v1 = self.get_vector(atom1)
+        v2 = self.get_vector(atom2)
+        vb = v2 - v1
+
+        if np.linalg.norm(vb) - current_distance > 0.00001:
+            raise ValueError(f"Error calculating bond distance!")
+
+        #### move all the atoms
+        delta = distance - current_distance
+        unitv = compute_unit_vector(vb)
+        for atom in atoms_to_move:
+            self.geometry[atom] = self.geometry[atom] + (delta * unitv)
+
+        #### check everything worked okay...
+        v1f = self.get_vector(atom1)
+        v2f = self.get_vector(atom2)
+        vbf = v2f - v1f
+
+        if np.linalg.norm(vbf) - distance > 0.001:
+            new_dist = np.linalg.norm(vbf)
+            raise ValueError(f"Error moving bonds -- new distance is {new_dist:.3f}. Operation failed!")
+
+        return self
+
+    def set_angle(self, atom1=None, atom2=None, atom3=None, angle=None, move="group", atoms=None):
+        """
+        Adjusts the ``atom1`` -- ``atom2`` -- ``atom3`` bond angle to be a fixed value by moving ``atom3``.
+
+        If `move` is set to "group", then all atoms bonded to ``atom3`` will also be moved.
+
+        If `move` is set to "atom", then only ``atom3`` will be moved.
+
+        Args:
+            atom1 (int): the number of the first atom
+            atom2 (int): the number of the second atom
+            atom3 (int): the number of the third atom
+            angle (float): final value in degrees of the ``atom1`` -- ``atom2`` -- ``atom3`` angle
+            move (str): determines how fragment moving is handled
+            atoms (list): 3-element list of atom numbers
+
+        Returns:
+            the Molecule object
+        """
+
+        if (atom1 is None) and (atom2 is None) and (atom3 is None) :
+            assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!"
+            assert len(atoms) == 3, "need 3 atom numbers to set angle"
+            atom1 = atoms[0]
+            atom2 = atoms[1]
+            atom3 = atoms[2]
+
+        assert isinstance(angle, (float, int, np.number)), "need angle to set angle"
+
+        self._check_atom_number(atom1)
+        self._check_atom_number(atom2)
+        self._check_atom_number(atom3)
+
+        if self.get_distance(atom1, atom2) < 0.01:
+            raise ValueError(f"atom {atom1} and atom {atom2} are too close!")
+
+        if self.get_distance(atom2, atom3) < 0.01:
+            raise ValueError(f"atom {atom2} and atom {atom3} are too close!")
+
+        if self.get_distance(atom1, atom3) < 0.01:
+            raise ValueError(f"atom {atom1} and atom {atom3} are too close!")
+
+        try:
+            angle = float(angle)
+        except Exception as e:
+            raise TypeError(f"angle {angle} cannot be converted to float!")
+
+        if (not isinstance(angle, float)) or ((angle < 0) or (angle > 360)):
+            raise ValueError(f"invalid value {angle} for angle!")
+
+        atoms_to_move = []
+        if move == "group":
+            if self.get_bond_order(atom2, atom3):
+                _, atoms_to_move = self._get_bond_fragments(atom2, atom3)
+            elif self.are_connected(atom2, atom3):
+                raise ValueError(
+                    f"atom {atom2} and atom {atom3} are connected but not bonded -- cannot adjust angle! try manually removing one or more bonds."
+                )
+            else:
+                atoms_to_move = self._get_fragment_containing(atom3)
+        elif move == "atom":
+            atoms_to_move = [atom3]
+        else:
+            raise ValueError(f"Invalid option {move} for parameter 'move'!")
+
+        if atom1 in atoms_to_move:
+            raise ValueError(
+                f"atom {atom1} and atom {atom3} are connected in multiple ways -- cannot adjust angle! try manually removing one or more bonds."
+            )
+
+        current_angle = self.get_angle(atom1, atom2, atom3)
+        delta = angle - current_angle
+
+        if np.abs(delta) < 0.001:
+            return
+
+        #### now the real work begins...
+
+        #### move everything to place atom2 at the origin
+        v2 = self.get_vector(atom2)
+        self.translate_molecule(-v2)
+
+        v1 = self.get_vector(atom1)
+        v3 = self.get_vector(atom3)
+
+        #### perform the actual rotation
+        rot_axis = np.cross(v1, v3)
+        rot_matrix = compute_rotation_matrix(rot_axis, delta)
+        for atom in atoms_to_move:
+            self.geometry[atom] = np.dot(rot_matrix, self.get_vector(atom))
+
+        #### and move it back!
+        self.translate_molecule(v2)
+
+        final_angle = self.get_angle(atom1, atom2, atom3)
+
+        #### need to compare cosines to prevent insidious phase difficulties (like 0.00 and 359.99)
+        if np.abs(math.cos(math.radians(final_angle)) - math.cos(math.radians(angle))) > 0.001:
+            raise ValueError(f"Error rotating atoms -- expected angle {angle}, got {final_angle}  -- operation failed!")
+
+        return self
+
+    def set_dihedral(self, atom1=None, atom2=None, atom3=None, atom4=None, dihedral=None, move="group34", check_result=True, atoms=None):
+        """
+        Adjusts the ``atom1`` -- ``atom2`` -- ``atom3`` -- ``atom4`` dihedral angle to be a fixed value by moving atom 4.
+
+        If ``move`` is set to "atom", then only ``atom4`` will be moved.
+
+        If ``move`` is set to "group4", then all atoms bonded to ``atom4`` will also be moved.
+
+        If ``move`` is set to "group34", then all atoms bonded to ``atom3`` and ``atom4`` will also be moved.
+
+        Args:
+            atom1 (int): the number of the first atom
+            atom2 (int): the number of the second atom
+            atom3 (int): the number of the third atom
+            atom4 (int): the number of the fourth atom
+            dihedral (float): final value in degrees of the ``atom1`` -- ``atom2`` -- ``atom3`` -- ``atom4`` angle
+            move (str): determines how fragment moving is handled
+            check_result (Bool): whether the final answer should be checked for correctness
+            atoms (list): 4-element list of atomic numbers
+
+        Returns:
+            the Molecule object
+        """
+
+        if (atom1 is None) and (atom2 is None) and (atom3 is None) and (atom4 is None):
+            assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!"
+            assert len(atoms) == 4, "need 4 atom numbers to set dihedral"
+            atom1 = atoms[0]
+            atom2 = atoms[1]
+            atom3 = atoms[2]
+            atom4 = atoms[3]
+
+        assert isinstance(dihedral, (float, int, np.number)), "need angle to set dihedral angle"
+
+        # check atom numbers
+        self._check_atom_number(atom1)
+        self._check_atom_number(atom2)
+        self._check_atom_number(atom3)
+        self._check_atom_number(atom4)
+
+        # check there is bond connectivity information
+        assert len(self.bonds) > 0, "no bond connectivity information"
+
+        # check for collinearity
+        angle = self.get_angle(atom1, atom2, atom3, check=False)
+        assert 0.0001 < angle < 179.9999, f"1/2/3 atoms {atom1}-{atom2}-{atom3} are collinear (angle={angle:.8f})"
+        angle = self.get_angle(atom2, atom3, atom4, check=False)
+        assert 0.0001 < angle < 179.9999, f"2/3/4 atoms {atom2}-{atom3}-{atom4} are collinear (angle={angle:.8f})"
+
+        for x in [atom1, atom2, atom3, atom4]:
+            for y in [atom1, atom2, atom3, atom4]:
+                if x <= y:
+                    continue
+                else:
+                    if self.get_sq_distance(x, y, check=False) < 0.001:
+                        raise ValueError(f"atom {x} and atom {y} are too close!")
+
+        try:
+            dihedral = float(dihedral)
+        except Exception as e:
+            raise TypeError(f"dihedral angle {dihedral} cannot be converted to float!")
+
+        if (not isinstance(dihedral, float)) or ((dihedral < 0) or (dihedral > 360)):
+            raise ValueError(f"invalid value {dihedral} for dihedral angle!")
+
+        atoms_to_move = []
+        if move == "group34":
+            #### add atom3's fragment to atom4
+            if self.get_bond_order(atom2, atom3):
+                _, atoms_to_move = self._get_bond_fragments(atom2, atom3)
+            elif self.are_connected(atom2, atom3):
+                raise ValueError(
+                    f"atom {atom2} and atom {atom3} are connected but not bonded -- cannot adjust dihedral angle! try manually removing one or more bonds."
+                )
+            else:
+                atoms_to_move = self._get_fragment_containing(atom3)
+
+            #### and make sure atom4 is in there too!
+            if atom4 not in atoms_to_move:
+                atoms_to_move += self._get_fragment_containing(atom4)
+        elif move == "group4":
+            if self.get_bond_order(atom3, atom4):
+                _, atoms_to_move = self._get_bond_fragments(atom3, atom4)
+            elif self.are_connected(atom3, atom4):
+                raise ValueError(
+                    f"atom {atom3} and atom {atom4} are connected but not bonded -- cannot adjust dihedral angle! try manually removing one or more bonds."
+                )
+            else:
+                atoms_to_move = self._get_fragment_containing(atom4)
+        elif move == "atom":
+            atoms_to_move = [atom4]
+        else:
+            raise ValueError(f"Invalid option {move} for parameter 'move'!")
+
+        if atom1 in atoms_to_move:
+            raise ValueError(
+                f"atom {atom1} and atom {atom4} are connected in multiple ways -- cannot adjust dihedral angle! try manually removing one or more bonds."
+            )
+
+        if atom2 in atoms_to_move:
+            raise ValueError(
+                f"atom {atom2} and atom {atom4} are connected in multiple ways -- cannot adjust dihedral angle! try manually removing one or more bonds."
+            )
+
+        if atom4 not in atoms_to_move:
+            raise ValueError(f"atom {atom4} is not going to be moved... this operation is doomed to fail!")
+
+        current_dihedral = self.get_dihedral(atom1, atom2, atom3, atom4, check=False)
+        delta = (dihedral - current_dihedral) % 360
+
+        if np.abs(delta) < 0.001:
+            return self
+
+        #### now the real work begins...
+        #### move everything to place atom2 at the origin
+        v3 = self.get_vector(atom3, check=False)
+        self.translate_molecule(-v3)
+
+        #### perform the actual rotation
+        rot_matrix = compute_rotation_matrix(-self.get_vector(atom2, check=False), delta)
+
+        for atom in atoms_to_move:
+            self.geometry[atom] = np.dot(rot_matrix, self.get_vector(atom, check=False))
+
+        #### and move it back!
+        self.translate_molecule(v3)
+
+        if check_result:
+            final_dihedral = self.get_dihedral(atom1, atom2, atom3, atom4, check=False)
+
+            #### need to compare cosines to prevent insidious phase difficulties (like 0.00 and 359.99)
+            #### this will throw ValueError for differences of about 2 degrees
+            if np.abs(math.cos(math.radians(final_dihedral)) - math.cos(math.radians(dihedral))) > 0.001:
+                raise ValueError(f"Error rotating atoms -- expected dihedral angle {dihedral}, got {final_dihedral}  -- operation failed!")
+
+        return self
+
+    def translate_molecule(self, vector):
+        """
+        Translates the whole molecule by the given vector.
+
+        Args:
+            vector (vector): the vector to translate by
+
+        Returns:
+            the Molecule object
+        """
+#        for atom in range(1, self.num_atoms() + 1):
+#            self.geometry[atom] = self.geometry[atom] + vector
+
+        self.geometry += vector
+
+        return self
+
+    def rotate_molecule(self, axis, degrees):
+        """
+        Rotates the whole molecule around the given axis.
+
+        Args:
+            axis (vector): the vector to rotate about
+            theta (float): how much to rotate (in degrees)
+
+        Returns:
+            the Molecule object
+        """
+        rot_matrix = compute_rotation_matrix(axis, degrees)
+
+        for atom in range(1, self.num_atoms() + 1):
+            self.geometry[atom] = np.dot(rot_matrix, self.geometry[atom])
+
+        return self
+
+    def calculate_mass_spectrum(self, **kwargs):
+        """
+        Generates list of m/z values.
+
+        Final weights rounded to one decimal point (because of low-res MS).
+        """
+        form_vec = np.zeros(shape=92, dtype=np.int8)
+        for z in self.atomic_numbers:
+            form_vec[z] += 1
+
+        masses, weights = _recurse_through_formula(form_vec, [0], [1], **kwargs)
+
+        new_masses, indices = np.unique(np.round(masses, decimals=1), return_inverse=True)
+        new_weights = np.zeros_like(new_masses)
+        for k in range(len(new_weights)):
+            new_weights[k] = np.sum(weights[np.nonzero(indices == k)])
+        new_weights = new_weights / np.max(new_weights)
+
+        return new_masses, new_weights
+
+    def add_atom_at_centroid(self, symbol, atom_numbers, weighted=False):
+        """
+        Adds atom with symbol ``symbol`` at the centroid of the atoms in ``atom_numbers``.
+
+        If ``weighted`` is ``True``, then the centroid calculation will take into account the atomic numbers of the atoms in question (placing the atom closer to more massive atoms).
+
+        Otherwise, the average is unweighted.
+
+        Args:
+            symbol (str): the atomic symbol of the atom to be added
+            atom_numbers (list): which atoms to put the new atom between
+            weighted (Bool): if the centroid calculation should be weighted (see above)
+
+        Returns:
+            the Molecule object
+        """
+
+        if (not isinstance(atom_numbers, list)) or (len(atom_numbers) < 2):
+            raise TypeError("atom_numbers must be list with at least two elements")
+
+        if not isinstance(symbol, str):
+            raise TypeError(f"symbol {symbol} must be a string!")
+
+        coords = [None] * len(atom_numbers)
+        weights = [1] * len(atom_numbers)
+        for index, atom in enumerate(atom_numbers):
+            self._check_atom_number(atom)
+            coords[index] = self.get_vector(atom)
+            if weighted == True:
+                weights[index] = self.atomic_numbers[atom]
+
+        new_coord = list(np.average(coords, weights=weights, axis=0))
+        return self.add_atom(coordinates=new_coord, symbol=symbol)
+
+    def add_atom(self, symbol, coordinates):
+        """
+        Add an atom with symbol ``symbol`` at position ``coordinates``.
+
+        Args:
+            symbol (str): symbol of the atom (e.g. "Cl", "Ar", "C")
+            coordinates (list): the coordinates to add
+
+        Returns:
+            the Molecule object
+        """
+
+        if (not isinstance(coordinates, (list, np.ndarray)) or (len(coordinates) != 3)):
+            raise TypeError("coordinates must be list with three elements")
+
+        if not isinstance(symbol, str):
+            raise TypeError(f"symbol {symbol} must be a string!")
+
+        number = get_number(symbol)
+        self.atomic_numbers = np.append(self.atomic_numbers, [number]).astype(np.int8).view(cctk.OneIndexedArray)
+        self.geometry = np.append(self.geometry, [coordinates], axis=0).view(cctk.OneIndexedArray)
+        self.bonds.add_node(self.num_atoms())
+
+        return self
+
+    def remove_atom(self, number):
+        """
+        Remove the atom with number ``number``.
+
+        Args:
+            number (int): number of the atom
+
+        Returns:
+            the Molecule object
+        """
+
+        self._check_atom_number(number)
+
+        try:
+            self.bonds.remove_node(number)
+            self.geometry = np.delete(self.geometry, number - 1, axis=0).view(cctk.OneIndexedArray)
+            self.atomic_numbers = np.delete(self.atomic_numbers, number - 1).view(cctk.OneIndexedArray)
+
+            #### need to renumber to fill gaps
+            self.bonds = nx.convert_node_labels_to_integers(self.bonds, first_label=1, ordering="sorted")
+
+            return self
+        except Exception as e:
+            raise ValueError("removing atom {number} failed!")
+
+    def get_atomic_number(self, atom):
+        """
+        Get the atomic number for a given atom.
+
+        Args:
+            atom (int): number of the first atom
+
+        Returns:
+            atomic_number (int): the atomic number of that atom
+        """
+        self._check_atom_number(atom)
+        return self.atomic_numbers[atom]
+
+    def get_atomic_symbol(self, atom):
+        """
+        Get the atomic symbol for a given atom.
+
+        Args:
+            atom (int): number of the first atom
+
+        Returns:
+            atomic_symbol (str): the atomic symbol of that atom
+         """
+        atomic_number = self.get_atomic_number(atom)
+        return get_symbol(atomic_number)
+
+    def get_atomic_symbols(self):
+        """
+        Get a list of atomic symbols for this Molecule.
+
+        Returns:
+            atomic_symbols (cctk.OneIndexedArray): the atomic symbols
+        """
+        n_atoms = self.get_n_atoms()
+        l = [ self.get_atomic_symbol(i) for i in range(1,n_atoms+1) ]
+        return cctk.OneIndexedArray(l)
+
+    def get_n_atoms(self):
+        """
+        Determine how many atoms are in this Molecule.
+
+        Returns
+            n_atoms (int): the number of atoms
+        """
+        return len(self.atomic_numbers)
+
+    def get_vector(self, atom, atom2=None, check=True):
+        """
+        Get the geometry vector for a given atom. If two atoms are specified, gives the vector connecting them (from ``atom2`` to ``atom``).
+        ``mol.get_vector(atom)`` is thus equivalent to ``mol.get_vector(atom, origin)``.
+
+        Args:
+            atom1 (int): number of the first atom
+            atom2 (int): number of the second atom (optional)
+            check (Bool): whether to validate input data (can be overridden to prevent slow double-checking)
+
+        Returns:
+            a Numpy array
+        """
+        if check:
+            self._check_atom_number(atom)
+
+        if atom2:
+            if check:
+                self._check_atom_number(atom2)
+            return (self.geometry[atom] - self.geometry[atom2]).view(np.ndarray)
+        else:
+            return self.geometry[atom].view(np.ndarray)
+
+    def get_distance(self, atom1=None, atom2=None, check=True, _dist=compute_distance_between, atoms=None):
+        """
+        Wrapper to compute distance between two atoms.
+
+        This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_dist``).
+
+        Args:
+            atom1 (int): number of the first atom
+            atom2 (int): number of the second atom
+            check (Bool): whether to validate input data (can be overridden to prevent slow double-checking)
+            _dist (function): function usd to compute distance
+            atoms (list): list of atomic numbers
+
+        Returns:
+            the distance, in Angstroms
+        """
+        if (atom1 is None) and (atom2 is None):
+            assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!"
+            assert len(atoms) == 2, "need 2 atom numbers to get distance"
+            atom1 = atoms[0]
+            atom2 = atoms[1]
+
+        if check:
+            try:
+                atom1 = int(atom1)
+                atom2 = int(atom2)
+            except Exception as e:
+                raise TypeError("atom numbers cannot be cast to int!")
+
+            self._check_atom_number(atom1)
+            self._check_atom_number(atom2)
+
+        return _dist(self.get_vector(atom1, check=False), self.get_vector(atom2, check=False))
+
+    def get_sq_distance(self, atom1, atom2, check=True):
+        """
+        Wrapper to compute squared distance between two atoms -- optimized for speed!
+
+        Args:
+            atom1 (int): number of the first atom
+            atom2 (int): number of the second atom
+            check (Bool): whether to validate input data (can be overridden to prevent slow double-checking)
+
+        Returns:
+            the squared distance
+        """
+        if check:
+            try:
+                atom1 = int(atom1)
+                atom2 = int(atom2)
+            except Exception as e:
+                raise TypeError("atom numbers cannot be cast to int!")
+
+            self._check_atom_number(atom1)
+            self._check_atom_number(atom2)
+
+        return np.sum(np.square(self.get_vector(atom1, atom2, check=False)))
+
+    def get_angle(self, atom1=None, atom2=None, atom3=None, check=True, _angle=compute_angle_between, atoms=None):
+        """
+        Wrapper to compute angle between three atoms.
+
+        This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_angle``).
+
+        Args:
+            atom1 (int): number of the first atom
+            atom2 (int): number of the second atom
+            atom3 (int): number of the third atom
+            check (Bool): whether to validate input data (can be overridden to prevent slow double-checking)
+            _angle (function): function usd to compute angle
+            atoms (list): list of atom numbers
+
+        Returns:
+            the angle, in degrees
+        """
+        if (atom1 is None) and (atom2 is None) and (atom3 is None):
+            assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!"
+            assert len(atoms) == 3, "need 3 atom numbers to get angle"
+            atom1 = atoms[0]
+            atom2 = atoms[1]
+            atom3 = atoms[2]
+
+        if check:
+            try:
+                atom1 = int(atom1)
+                atom2 = int(atom2)
+                atom3 = int(atom3)
+            except Exception as e:
+                raise TypeError("atom numbers cannot be cast to int!")
+
+            self._check_atom_number(atom1)
+            self._check_atom_number(atom2)
+            self._check_atom_number(atom3)
+
+        v1 = self.get_vector(atom1, check=False)
+        v2 = self.get_vector(atom2, check=False)
+        v3 = self.get_vector(atom3, check=False)
+
+        return _angle(v1 - v2, v3 - v2)
+
+    def get_dihedral(self, atom1=None, atom2=None, atom3=None, atom4=None, check=True, _dihedral=compute_dihedral_between, atoms=None):
+        """
+        Wrapper to compute dihedral angle between four atoms.
+
+        This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_dihedral``).
+
+        Args:
+            atom1 (int): number of the first atom
+            atom2 (int): number of the second atom
+            atom3 (int): number of the third atom
+            atom4 (int): number of the fourth atom
+            check (Bool): whether to validate input data (can be overridden to prevent slow double-checking)
+            _dihedral (function): function used to compute dihedral
+            atoms (list): list of atom numbers
+
+        Returns:
+            the dihedral angle, in degrees
+        """
+        if (atom1 is None) and (atom2 is None) and (atom3 is None) and (atom4 is None):
+            assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!"
+            assert len(atoms) == 4, "need 4 atom numbers to get dihedral angle"
+            atom1 = atoms[0]
+            atom2 = atoms[1]
+            atom3 = atoms[2]
+            atom4 = atoms[3]
+
+        if check:
+            try:
+                atom1 = int(atom1)
+                atom2 = int(atom2)
+                atom3 = int(atom3)
+                atom4 = int(atom4)
+            except Exception as e:
+                raise TypeError("atom numbers cannot be cast to int!")
+
+            self._check_atom_number(atom1)
+            self._check_atom_number(atom2)
+            self._check_atom_number(atom3)
+            self._check_atom_number(atom4)
+
+        return _dihedral(
+            self.get_vector(atom1, check=False),
+            self.get_vector(atom2, check=False),
+            self.get_vector(atom3, check=False),
+            self.get_vector(atom4, check=False),
+        )
+
+    def get_bond_order(self, atom1, atom2):
+        """
+        Wrapper to get bond order between two atoms.
+
+        Args:
+            atom1 (int): number of the first atom
+            atom2 (int): number of the second atom
+
+        Returns:
+            the bond order
+        """
+        self._check_atom_number(atom1)
+        self._check_atom_number(atom2)
+
+        if self.bonds.has_edge(atom1, atom2):
+            return self.bonds[atom1][atom2]["weight"]
+        else:
+            return 0
+
+    def are_connected(self, atom1, atom2):
+        """
+        Wrapper to tell if two atoms are connected.
+        """
+        self._check_atom_number(atom1)
+        self._check_atom_number(atom2)
+
+        if atom1 in self._get_fragment_containing(atom2):
+            return True
+        else:
+            return False
+
+    def get_atoms_by_symbol(self, symbol):
+        """
+        Returns all the numbers of atoms of type ``symbol`` in the molecule.
+        """
+        if not isinstance(symbol, str):
+            raise TypeError("symbol {symbol} must be a string")
+
+        number = get_number(symbol)
+        atoms = []
+
+        for index, atom in enumerate(self.atomic_numbers, start=1):
+            if atom == number:
+                atoms.append(index)
+
+        return atoms
+
+    def get_heavy_atoms(self):
+        """
+        Returns a list of all the heavy atoms in the molecule (i.e., not hydrogen), for array indexing.
+        """
+        atoms = []
+
+        for index, atom in enumerate(self.atomic_numbers, start=1):
+            if atom != 1:
+                atoms.append(index)
+
+        return atoms
+
+    def get_adjacent_atoms(self, atom):
+        """
+        Returns a list of the neighbors of ``atom``. If ``atom`` has no neighbors, an empty list will be returned.
+        """
+        try:
+            atom = int(atom)
+        except Exception as e:
+            raise TypeError(f"atom number {atom} cannot be cast to int!")
+
+        self._check_atom_number(atom)
+
+        return list(self.bonds.neighbors(atom))
+
+    def num_atoms(self):
+        return len(self.atomic_numbers)
+
+    def rms_distance_between_atoms(self):
+        """
+        Returns the RMS distance (in Angstroms) between every pair of atoms - a quick, easy-to-calculate proxy for minimizing steric clashes.
+        """
+        distance = 0
+        for i in range(1, self.num_atoms() + 1):
+            for j in range(1, self.num_atoms() + 1):
+                if i == j:
+                    continue
+                distance += self.get_distance(i, j) ** 2
+
+        return math.sqrt(distance) / self.num_atoms()
+
+    def optimize_dihedral(self, atom1, atom2, atom3, atom4, step=10):
+        """
+        Minimizes the value of ``self.rms_distance_between_atoms`` for the given dihedral, in one-degree increments.
+        A cheap alternative to geometry optimization using *ab initio* methods or density functional theory.
+
+        Args:
+            atom1 (int): atom number of the first atom in the dihedral
+            atom2 (int): atom number of the second atom in the dihedral
+            atom3 (int): atom number of the third atom in the dihedral
+            atom4 (int): atom number of the fourth atom in the dihedral
+            step (float): explore angles from 0 to 360 with this stepsize in degrees
+
+        Returns:
+            the final value of the angle
+        """
+        self._check_atom_number(atom1)
+        self._check_atom_number(atom2)
+        self._check_atom_number(atom3)
+        self._check_atom_number(atom4)
+
+        best_angle = 0
+        best_dist = 0
+
+        for angle in range(0, 360, step):
+            self.set_dihedral(atom1, atom2, atom3, atom4, angle)
+            if self.rms_distance_between_atoms() > best_dist:
+                best_dist = self.rms_distance_between_atoms()
+                best_angle = angle
+
+        self.set_dihedral(atom1, atom2, atom3, atom4, best_angle)
+        return best_angle
+
+    def atom_string(self, atom):
+        """
+        Returns the elemental symbol and the atom number for a given atom.
+
+        For example, ``methane.atom_string(1)`` might return "C1".
+
+        Args:
+            atom (int): number of the atom
+
+        Returns:
+            the aforementioned atom string
+        """
+        try:
+            atom = int(atom)
+        except Exception as e:
+            raise ValueError("atom cannot be cast to int")
+
+        self._check_atom_number(atom)
+
+        return f"{get_symbol(self.atomic_numbers[atom])}{atom}"
+
+    def perturb(self, size=0.005):
+        """
+        This function can be used to generate a slightly different molecule in cases where numerical (or geometric) converge is problematic.
+
+        It adds a random variable (sampled from a normal distribution, centered at 0 with stddev ``size`) to every number in ``self.geometry``.
+
+        Args:
+            size (float): stddev of the normal distribution
+
+        Returns:
+            the Molecule object
+        """
+        geometry = self.geometry
+        random = np.random.normal(scale=size, size=geometry.shape)
+
+        self.geometry = geometry + random
+        return self
+
+    def center(self):
+        """
+        Moves the centroid to the origin.
+        """
+        atoms = np.arange(1, self.num_atoms()+1)
+        self.translate_molecule(-self.geometry[atoms].mean(axis=0))
+        return self
+
+    @classmethod
+    def combine_molecules(cls, molecule1, molecule2):
+        """
+        Combine two molecules into one final molecule.
+
+        Bonding information is not currently preserved.
+
+        Args:
+            molecule1 (Molecule): 1st molecule
+            molecule2 (Molecule): 2nd molecule
+
+        Returns:
+            new ``Molecule`` object
+        """
+
+        atoms = np.hstack((molecule1.atomic_numbers.T, molecule2.atomic_numbers.T)).view(cctk.OneIndexedArray)
+        geoms = np.vstack((molecule1.geometry, molecule2.geometry)).view(cctk.OneIndexedArray)
+        charge = molecule1.charge + molecule2.charge
+
+        s1 = (molecule1.multiplicity - 1) / 2
+        s2 = (molecule2.multiplicity - 1) / 2
+        multiplicity = (s1+s2) * 2 + 1
+
+        return Molecule(atoms, geoms, charge=charge, multiplicity=multiplicity)
+
+    def volume(self, pts_per_angstrom=10, qhull=False):
+        """
+        Returns volume calculated using the Gavezotti algorithm (JACS, 1983, 105, 5220). Relatively slow.
+        If MemoryError, defaults to a qhull-based approach (accurate in the limit as number of atoms goes to infinity)
+
+        Args:
+            pts_per_angstrom (int): how many grid points to use per Å - time scales as O(n**3) so be careful!
+            qhull (bool): use faster QHull algorithm
+
+        Returns:
+            volume in Å**3
+        """
+        if not qhull:
+            try:
+                assert isinstance(pts_per_angstrom, int), "Need an integer number of pts per Å!"
+                assert pts_per_angstrom > 0, "Need a positive integer of pts per Å!"
+
+                box_max = np.max(self.geometry.view(np.ndarray), axis=0) + 4
+                box_min = np.min(self.geometry.view(np.ndarray), axis=0) - 4
+
+                box_volume = (box_max[0] - box_min[0]) * (box_max[1] - box_min[1]) * (box_max[2] - box_min[2])
+
+                x_vals = np.linspace(box_min[0], box_max[0], int((box_max[0] - box_min[0]) * pts_per_angstrom))
+                y_vals = np.linspace(box_min[1], box_max[1], int((box_max[1] - box_min[1]) * pts_per_angstrom))
+                z_vals = np.linspace(box_min[2], box_max[2], int((box_max[2] - box_min[2]) * pts_per_angstrom))
+
+                # h4ck3r
+                box_pts = np.stack([np.ravel(a) for a in np.meshgrid(x_vals, y_vals, z_vals)], axis=-1)
+
+                # caching to speed call
+                vdw_radii = {z: get_vdw_radius(z) for z in set(self.atomic_numbers)}
+                radii_per_atom = np.array([vdw_radii[z] for z in self.atomic_numbers]).reshape(-1,1)
+
+                # this is the slow part since it's approximately a zillion operations
+                dists_per_atom = cdist(self.geometry.view(np.ndarray), box_pts)
+                occupied = np.sum(np.max(dists_per_atom < radii_per_atom, axis=0))
+
+                percent_occupied = occupied / box_pts.shape[0]
+                return percent_occupied * box_volume
+            except MemoryError:
+                qhull = True
+
+        if qhull:
+            import scipy
+            hull = scipy.spatial.ConvexHull(self.geometry.view(np.ndarray))
+            return hull.volume
+
+    def swap_atom_numbers(self, atom1, atom2):
+        """
+        Interchanges the numbers of ``atom1`` and ``atom2``.
+
+        Args:
+            atom1 (int): number of 1st atom
+            atom2 (int): number of 2nd atom
+
+        Returns
+            new ``Molecule`` object (does not modify in-place)
+        """
+        self._check_atom_number(atom1)
+        self._check_atom_number(atom2)
+        mol = copy.deepcopy(self)
+
+        z1 = mol.atomic_numbers[atom1]
+        z2 = mol.atomic_numbers[atom2]
+        g1 = copy.deepcopy(mol.geometry[atom1])
+        g2 = copy.deepcopy(mol.geometry[atom2])
+
+        mol.atomic_numbers[atom2] = z1
+        mol.atomic_numbers[atom1] = z2
+        mol.geometry[atom2] = g1
+        mol.geometry[atom1] = g2
+
+        mapping = {atom2: atom1, atom1: atom2}
+        mol.bonds = nx.relabel_nodes(mol.bonds, mapping, copy=True)
+        return mol
+
+    def epimerize(self, center_atom, substituent1, substituent2):
+        """
+        Epimerizes ``center_atom`` by exchanging the groups corresponding to ``substituent1`` and ``substituent2``.
+        Both substituents must be bonded to the center atom!
+
+        Args:
+            center_atom (int): number of middle atom
+            substituent1 (int): number of 1st atom
+            substituent1 (int): number of 2nd atom
+
+        Returns
+            new ``Molecule`` object (does not modify in-place)
+        """
+
+        self._check_atom_number(center_atom)
+        self._check_atom_number(substituent1)
+        self._check_atom_number(substituent2)
+
+        assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+
+        adj = self.get_adjacent_atoms(center_atom)
+        assert len(adj) == 4, "center atom must be making 4 bonds!"
+        assert substituent1 in adj, "1st substituent is not bonded to center atom!"
+        assert substituent2 in adj, "2nd substituent is not bonded to center atom!"
+
+        #### remove both substituents
+        mol, group1, mmap1, gmap1  = cctk.Group.remove_group_from_molecule(self, center_atom, substituent1, return_mapping=True)
+        mol, group2, mmap2, gmap2  = cctk.Group.remove_group_from_molecule(mol, mmap1[center_atom], mmap1[substituent2], return_mapping=True)
+
+        h1 = mol.num_atoms() - 1
+        h2 = mol.num_atoms()
+
+        #### add them back in the opposite fashion
+        mol, mmap3, gmap3 =  cctk.Group.add_group_to_molecule(mol, group2, h1, return_mapping=True)
+        mol = cctk.Group.add_group_to_molecule(mol, group1, mmap3[h2])
+
+        #### relabel new graph to match original molecule
+        which = top.get_stereogenic_centers(self)
+        which.remove(center_atom)
+        return mol.renumber_to_match(self, check_chirality=which)
+
+    def renumber_to_match(self, model, check_chirality="all"):
+        """
+        Renumbers atoms to match ``model`` (must have isomorphic bond graph). Returns a copy of ``self`` with renumbered atoms.
+
+        Args:
+            model (cctk.Molecule): isomorphic molecule to renumber by
+            check_chirality (list of atomic numbers): atomic numbers to check, to prevent inversion due to graph isomorphism.
+                Alternatively ``None`` will prevent any checking and "all" will use ``cctk.topology.get_exchangable_centers()``.
+
+        Returns:
+            new ``Molecule`` object
+        """
+
+        assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+
+        #### use networkx to generate mapping
+        #### you need the node matcher to distinguish between e.g. H, F, Cl
+        self._add_atomic_numbers_to_nodes()
+        model._add_atomic_numbers_to_nodes()
+        nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0)
+
+        match = nx.algorithms.isomorphism.GraphMatcher(model.bonds, self.bonds, node_match=nm)
+        assert match.is_isomorphic(), "can't renumber non-isomorphic graphs!"
+        new_ordering = [match.mapping[x] for x in range(1, self.num_atoms() + 1)]
+        inv_mapping = {v:k  for k,v in match.mapping.items()} # bit kludgy but works
+
+        #### create renumbered molecule
+        mol = copy.deepcopy(self)
+        mol.atomic_numbers = self.atomic_numbers[new_ordering]
+        mol.geometry = self.geometry[new_ordering]
+        mol.bonds = nx.relabel_nodes(self.bonds, mapping=inv_mapping, copy=True)
+
+        if check_chirality == "all":
+            check_chirality = top.get_exchangeable_centers(mol)
+
+        #### diastereotopic protons get scrambled by the above code so we gotta go through and fix all of them
+        #### this happens because networkx doesn't store chirality - a known limitation of graph molecular encoding!
+        if isinstance(check_chirality, list):
+            #### find all the differences and exchange them
+            model_report = top.get_chirality_report(model, check_chirality)
+
+            #### generate all meso ring permutations
+            candidates = top.flip_meso_rings(mol, atoms=check_chirality)
+
+            #### for each, try flipping configuration of all centers
+            for candidate in candidates:
+                report = top.get_chirality_report(candidate, check_chirality)
+                for center in check_chirality:
+                    if model_report[center] != report[center]:
+                        try:
+                            candidate = top.exchange_identical_substituents(candidate, center)
+                        except ValueError as e:
+                            break
+
+                #### check that we actually fixed all the problems
+                mol_report = top.get_chirality_report(candidate, check_chirality)
+                all_good = True
+                for center in check_chirality:
+                    if mol_report[center] != model_report[center]:
+                        all_good = False
+                        break
+                #### if we did, then return
+                if all_good:
+                    return candidate
+
+        raise ValueError("can't get a proper renumbering: are you *sure* these two molecules can have the same chirality?")
+
+    def _add_atomic_numbers_to_nodes(self):
+        """
+        Add the atomic numbers to each node attribute, to allow for distinguishment of F and H during graph renumbering.
+        """
+        nx.set_node_attributes(self.bonds, {z: {"atomic_number": self.atomic_numbers[z]} for z in range(1, self.num_atoms() +  1)})
+
+    def is_atom_in_ring(self, atom):
+        assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+        cycles = nx.cycle_basis(self.bonds, root=atom)
+        for cycle in cycles:
+            if atom in cycle:
+                return True
+        return False
+
+    def get_components(self):
+        """
+        Returns a list of all the connected components in a molecule.
+        """
+        assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+        fragments = nx.connected_components(self.bonds)
+        return [list(f) for f in list(fragments)]
+
+    def limit_solvent_shell(self, solute=0, num_atoms=0, num_solvents=10, distance_from_atom=None, return_idxs=False):
+        """
+        Automatically detects solvent molecules and removes them until you have a set number of solvents or atoms.
+
+        The "distance" between molecules is the minimum of the pairwise atomic distances, to emphasize inner-sphere interactions.
+
+        Args:
+            solute (int): which fragment is the solute, 0-indexed
+            num_atoms (int): remove atoms until there are this number (modulo the size of a solvent molecule)
+            num_solvents (int): remove solvent molecules until there are this number
+            distance_from_atom (int): if you want to find molecules closest to a given atom in the solute, specify the atom number here.
+                if this atom is not in the solute fragment, an exception will be raised.
+            return_idxs (bool): if True, indices of atoms that would be in the new molecule are returned. no change is made to ``self``.
+
+        Returns:
+            new ``Molecule`` object
+        """
+        assert isinstance(num_atoms, int)
+        assert isinstance(num_solvents, int)
+
+        fragments = self.get_components()
+        solute_x = self.geometry[fragments[solute]].view(np.ndarray)
+
+        if distance_from_atom:
+            assert distance_from_atom in fragments[solute], f"{distance_from_atom} is not in the solute fragment"
+            solute_x = self.geometry[[distance_from_atom]].view(np.ndarray)
+
+        distances = np.zeros(shape=len(fragments))
+        for i, f in enumerate(fragments):
+            if i == solute:
+                distances[i] = 0
+            else:
+                solvent_x = self.geometry[f].view(np.ndarray)
+                # cdist is absurdly fast
+                pairwise_distances = cdist(solvent_x, solute_x)
+                distances[i] = np.min(pairwise_distances)
+
+        mol = copy.deepcopy(self)
+
+        #### reverse order - farthest away comes first
+        order = np.argsort(distances)[::-1]
+
+        current_num_solvents = len(fragments) - 1
+        current_num_atoms = mol.num_atoms()
+
+        to_remove = []
+        for i in order:
+            for j in fragments[i]:
+                to_remove.append(j)
+                current_num_atoms += -1
+            current_num_solvents += -1
+
+            if current_num_atoms <= num_atoms or num_solvents == current_num_solvents:
+                if return_idxs:
+                    all_idxs = set(range(1,self.num_atoms()))
+                    return list(all_idxs - set(to_remove))
+                else:
+                    #### have to remove in reverse direction for indexing consistency
+                    for j in sorted(to_remove, reverse=True):
+                        mol.remove_atom(j)
+                    return mol
+
+    def center_periodic(self, center, side_length):
+        """
+        Adjusts a molecule to be in the center of a cube, moving all other molecules accordingly. Bonded subgroups will be moved as a unit.
+
+        For analysis of MD files with periodic boundary conditions.
+
+        Args:
+            center (int): atomic number to center
+            side_length (float): length of side, in Å
+        """
+        self._check_atom_number(center)
+        assert isinstance(side_length, (int, float))
+        assert side_length > 0
+
+        #### Center the atom of interest
+        self.geometry += -1 * self.geometry[center]
+        self.geometry += side_length / 2
+
+        for f in self.get_components():
+            centroid = np.mean(self.geometry[f], axis=0)
+            self.geometry[f] += -1 * np.floor_divide(centroid, side_length) * side_length
+
+        return self
+
+    @classmethod
+    def new_from_name(cls, name):
+        """
+        Create a new ``Molecule`` instance using ``rdkit``.
+        """
+        assert isinstance(name, str)
+        from urllib.request import urlopen
+
+        try:
+            url_name = re.sub(" ", "%20", name)
+            url = 'http://cactus.nci.nih.gov/chemical/structure/' + url_name + '/smiles'
+            smiles = urlopen(url, timeout=5).read().decode('utf8')
+            return cls.new_from_smiles(smiles)
+        except Exception as e:
+            raise ValueError(f"something went wrong auto-generating molecule {name}:\nurl: {url}\n{e}")
+
+    @classmethod
+    def new_from_smiles(cls, smiles):
+        """
+        Create a new ``Molecule`` instance using ``rdkit``.
+        """
+        assert isinstance(smiles, str)
+
+        try:
+            from rdkit.Chem import AllChem as Chem
+        except ImportError as e:
+            raise ImportError(f"``rdkit`` must be installed for this function to work!\n{e}")
+
+        try:
+            rdkm = Chem.MolFromSmiles(smiles)
+            rdkm = Chem.AddHs(rdkm)
+            Chem.EmbedMolecule(rdkm)
+            Chem.MMFFOptimizeMolecule(rdkm)
+
+            nums = []
+            for atom in rdkm.GetAtoms():
+                nums.append(atom.GetAtomicNum())
+            geom = rdkm.GetConformers()[0].GetPositions()
+
+            return cls(nums, geom)
+
+        except Exception as e:
+            raise ValueError(f"something went wrong auto-generating molecule {smiles}:\n{e}")
+
+    def fragment(self):
+        """
+        Returns list of ``cctk.Molecule`` objects based on the bond-connected components of ``self``.
+        """
+        fragments = list()
+        indices = self.get_components()
+        for idx in indices:
+            mol = cctk.Molecule(self.atomic_numbers[idx], self.geometry[idx]).assign_connectivity()
+            fragments.append(mol)
+        return fragments
+
+    def get_symmetric_atoms(self):
+        """
+        Returns lists of symmetric atoms, as defined in ``cctk.load_group``.
+
+        Useful for NMR spectroscopy, etc.
+        """
+        from cctk.load_groups import group_iterator
+
+        symmetric_sets = []
+        for group in group_iterator(symmetric_only=True):
+            # this gives us a list of dictionaries mapping from self.atomic_numbers to group numbers
+            matches = top.find_group(self, group)
+
+            for m in matches:
+                i = {v: k for k,v in m.items()}
+                for n in group.isomorphic:
+                    symmetric_sets.append([i[idx] for idx in n])
+
+        #### some groups overlap (e.g. methyl and t-butyl), so now we collapse the overlapping sets
+        for i, s1 in enumerate(symmetric_sets):
+            for j, s2 in enumerate(symmetric_sets[i+1:]):
+                if set(s1).intersection(set(s2)):
+                    symmetric_sets[i + j + 1] = list(set(s1).union(s2))
+                    symmetric_sets[i] = None # can't delete yet - messes up indexing
+
+        #### now we delete
+        symmetric_sets = list(filter(None, symmetric_sets))
+        return symmetric_sets
+
+    def atomic_symbols(self):
+        """
+        Return list of atomic symbols.
+        """
+        symbols = {z: get_symbol(z) for z in set(self.atomic_numbers)}
+        return [symbols[z] for z in self.atomic_numbers]
+
+    def optimize(self, inplace=True, nprocs=1, return_energy=False):
+        """
+        Optimize molecule at the GFN2-xtb level of theory.
+
+        Args:
+            inplace (Bool): whether or not to return a new molecule or simply modify ``self.geometry``
+            nprocs (int): number of processors to use
+            return_energy (Bool): whether to return energy or not
+        """
+        import cctk.optimize as opt
+        assert isinstance(nprocs, int), "nprocs must be int!"
+        optimized, energy = opt.optimize_molecule(self, nprocs=nprocs, return_energy=True)
+
+        if inplace:
+            self.geometry = optimized.geometry
+            if return_energy:
+                return self, energy
+            else:
+                return self
+        else:
+            if return_energy:
+                return optimized, energy
+            else:
+                return optimized
+
+    def compute_energy(self, nprocs=1):
+        """
+        Compute energy of molecule at the GFN2-xtb level of theory.
+
+        Args:
+            nprocs (int): number of processors to use
+        """
+        import cctk.optimize as opt
+        assert isinstance(nprocs, int), "nprocs must be int!"
+        energy = opt.get_energy(self, nprocs=nprocs)
+        return energy
+
+    def csearch(self, nprocs=1, constraints=[], logfile=None, noncovalent=False, use_tempdir=True, gfn=2, additional_flags=None):
+        """
+        Optimize molecule at the GFN2-xtb level of theory.
+
+        Args:
+            nprocs (int): number of processors to use
+            constraints (list): atoms numbers to freeze
+            noncovalent (bool): whether or not to use non-covalent settings
+            logfile (str): file to write ongoing ``crest`` output to
+            use_tempdir (bool): write intermediate files to hidden directory (as opposed to current directory)
+            gfn (int or ``ff``): level of theory, either 1, 2, or ``ff``
+            additional_flags (str): additional flags for command line
+
+        Returns
+            ConformationalEnsemble
+        """
+        import cctk.optimize as opt
+        assert isinstance(nprocs, int), "nprocs must be int!"
+        return opt.csearch(molecule=self, nprocs=nprocs, constraints=constraints, noncovalent=noncovalent, logfile=logfile, use_tempdir=use_tempdir, gfn=gfn, additional_flags=additional_flags)
+
+    def num_neighbors_by_atom(self):
+        """
+        Returns a list of the number of neighbors of each atom.
+        """
+        result = []
+        for i in range(self.num_atoms()):
+            result.append(len(self.get_adjacent_atoms(i)))
+        return result
+
+    def atoms_moving_in_imaginary(self, max_num=5, percent_cutoff=0.03, return_string=False):
+        """
+        Returns atoms moving in imaginary, ranked by how much they're moving.
+
+        Args:
+            max_num (int): how many atoms max to return
+            percent_cutoff (float): threshold for what percent of total TS movement qualifies as "movement"
+            return_string (bool): whether or not to return a formatted string report
+
+        Returns:
+            list of atomic numbers or string
+        """
+        imaginary = 0
+        ts_mode = None
+        for mode in self.vibrational_modes:
+            if mode.frequency < imaginary:
+                imaginary = mode.frequency
+                ts_mode = mode
+
+        if ts_mode is None:
+            if return_string:
+                return ""
+            else:
+                return None
+
+        displacements = np.linalg.norm(ts_mode.displacements.view(np.ndarray), axis=-1)
+
+        atoms_ranked = np.argsort(displacements)[::-1] + 1
+        percent_movement = np.sort(displacements)[::-1] / np.sum(displacements)
+
+        return_list, string = list(), ""
+        for atom, percent in zip(atoms_ranked, percent_movement):
+            if percent > percent_cutoff and len(return_list) <= max_num:
+                return_list.append(atom)
+                string += f"{self.atom_string(atom)} ({percent:.1%}), "
+            else:
+                if return_string:
+                    return string[:-2]
+                else:
+                    return return_list
+
+
+    def to_string(self):
+        """
+        Save the current molecule as a string, for subsequent loading. Not human-readable.
+
+        Vibrational modes are currently not saved.
+        """
+        # name, charge, multiplicity need no encoding
+        atomic_number_encoding = numpy_to_bytes(self.atomic_numbers.view(np.ndarray))
+        geometry_encoding = numpy_to_bytes(self.geometry.view(np.ndarray))
+        bonds_encoding = numpy_to_bytes(nx.convert_matrix.to_numpy_array(self.bonds))
+
+        if self.name is None:
+            self.name = "name"
+
+        cctk_version = pkg_resources.get_distribution("cctk").version
+
+        store_dict = {
+            "name": self.name,
+            "charge": self.charge,
+            "multiplicity": self.multiplicity,
+            "atomic_numbers": atomic_number_encoding,
+            "geometry": geometry_encoding,
+            "bonds": bonds_encoding,
+            "cctk_version": cctk_version,
+        }
+
+        return yaml.dump(store_dict)
+
+    @classmethod
+    def from_string(cls, string, check_version=True):
+        """
+        Loads a ``cctk.Molecule`` object from a string.
+
+        Arguments:
+            string (str): stringified version of the molecule
+            check_version (bool): whether version consistency should be enforced
+        """
+
+        try:
+            store_dict = yaml.safe_load(string)
+
+            if check_version:
+                cctk_version = pkg_resources.get_distribution("cctk").version
+                assert cctk_version == store_dict["cctk_version"], f"Warning: the data was saved in cctk {store_dict['cctk_version']} but is being loaded in cctk {cctk_version}!"
+
+            atomic_numbers = bytes_to_numpy(store_dict["atomic_numbers"]).astype(np.int8)
+            geometry = bytes_to_numpy(store_dict["geometry"]).astype(np.float32)
+            bonds = nx.convert_matrix.from_numpy_array(bytes_to_numpy(store_dict["bonds"]))
+
+            mol = cls(
+                atomic_numbers,
+                geometry,
+                bonds=bonds,
+                charge=store_dict["charge"],
+                multiplicity=store_dict["multiplicity"],
+                name=store_dict["name"],
+                checks=False, # trust nx data implicitly
+            )
+
+            return mol
+
+        except Exception as e:
+            raise ValueError(f"this stringified Molecule fails import: {e}")
+
+    def coulomb_analysis(self, atoms1, atoms2, charges):
+        """
+        Computes the net Coulomb forces between atoms ``atoms1`` and atoms ``atoms2``.
+        """
+        if isinstance(charges, np.ndarray):
+            charges = charges.view(cctk.OneIndexedArray)
+        elif isinstance(charges, list):
+            charges = cctk.OneIndexedArray(charges)
+
+        assert isinstance(charges, cctk.OneIndexedArray), "charges must be cctk.OneIndexedArray"
+        assert len(charges) == self.num_atoms(), "need a charge for every atom"
+        assert isinstance(atoms1, list)
+        assert isinstance(atoms2, list)
+
+        q1 = charges[atoms1]
+        q2 = charges[atoms2]
+
+        # need to convert to Bohr
+        r1 = self.geometry[atoms1] / 0.529
+        r2 = self.geometry[atoms2] / 0.529
+
+        R = cdist(r1, r2)**2
+        Q = np.outer(q1, q2)
+
+        energy = 0
+        for i in range(len(atoms1)):
+            assert atoms1[i] not in atoms2, "lists must be non-overlapping"
+            for j in range(len(atoms2)):
+                energy += Q[i][j] / R[i][j]
+
+        return energy * 627.509 # convert to kcal/mol
diff --git a/build/lib/cctk/optimize.py b/build/lib/cctk/optimize.py
new file mode 100644
index 0000000..598119c
--- /dev/null
+++ b/build/lib/cctk/optimize.py
@@ -0,0 +1,181 @@
+"""
+Functions to assist in optimizing structures.
+"""
+
+import os, tempfile, shutil, re
+import cctk
+import subprocess as sp
+
+from enum import Enum
+
+class Methods(Enum):
+    """
+    Enum of different computational methods. For now, just GFN2-xtb is implemented.
+    """
+    GFN2_XTB = "xtb"
+
+def installed(command):
+    if shutil.which(command) is not None:
+        return True
+    if re.search(command, os.environ["PATH"]):
+        return True
+
+    return False
+
+def optimize_molecule(molecule, method=Methods.GFN2_XTB, nprocs=1, return_energy=False):
+    """
+    Dispatcher method to connect method to molecule.
+
+    Args:
+        molecule (cctk.Molecule):
+        method (Methods):
+        nprocs (int): number of cores to employ
+        return_energy (Bool): to return energy or not
+
+    Returns:
+        molecule
+        energy (optional)
+    """
+    assert isinstance(molecule, cctk.Molecule), "need a valid molecule!"
+    assert isinstance(method, Methods), "need a valid molecule!"
+
+    if method is Methods.GFN2_XTB:
+        return run_xtb(molecule, nprocs=nprocs, return_energy=return_energy, opt=True)
+
+def get_energy(molecule, method=Methods.GFN2_XTB, nprocs=1):
+    """
+    Dispatcher method to connect method to molecule.
+
+    Args:
+        molecule (cctk.Molecule):
+        method (Methods):
+        nprocs (int): number of cores to employ
+
+    Returns:
+        energy
+    """
+    assert isinstance(molecule, cctk.Molecule), "need a valid molecule!"
+    assert isinstance(method, Methods), "need a valid molecule!"
+
+    if method is Methods.GFN2_XTB:
+        _, energy = run_xtb(molecule, nprocs=nprocs, return_energy=True, opt=False)
+        return energy
+
+def run_xtb(molecule, nprocs=1, return_energy=False, opt=False):
+    """
+    Run ``xtb`` in a temporary directory and return the output molecule.
+    """
+    assert isinstance(molecule, cctk.Molecule), "need a valid molecule!"
+    assert isinstance(nprocs, int)
+
+    assert installed("xtb"), "xtb must be installed!"
+
+    command = f"xtb --gfn 2 --chrg {molecule.charge} --uhf {molecule.multiplicity - 1}"
+    if nprocs > 1:
+        command += f" --parallel {nprocs}"
+
+    if opt:
+        command += " xtb-in.xyz --opt tight &> xtb-out.out"
+    else:
+        command += " xtb-in.xyz &> xtb-out.out"
+
+    try:
+        os.environ["OMP_NUM_THREADS"] = str(nprocs)
+        os.environ["MKL_NUM_THREADS"] = str(nprocs)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            cctk.XYZFile.write_molecule_to_file(f"{tmpdir}/xtb-in.xyz", molecule)
+            sp.run(command, stdout=sp.PIPE, stderr=sp.PIPE, cwd=tmpdir, shell=True)
+
+            output_mol, energy = None, None
+            if opt:
+                output_mol = cctk.XYZFile.read_file(f"{tmpdir}/xtbopt.xyz").get_molecule()
+                energy_file = cctk.File.read_file(f"{tmpdir}/xtbopt.log")
+                fields = energy_file[1].split()
+                energy, gradient = float(fields[1]), float(fields[3])
+
+            else:
+                # stopgap solution but should work ok. XTB output files should actually be parsed eventually. 
+                # ccw 4.15.21
+                output_file = cctk.File.read_file(f"{tmpdir}/xtb-out.out")
+                r = re.compile("total energy\s+(-?\d+.\d+)", re.IGNORECASE)
+                for line in output_file[::-1]:
+                    m = r.search(line)
+                    if m:
+                        energy = float(m.group(1))
+                        break
+
+            if return_energy:
+                return output_mol, energy
+            else:
+                return output_mol
+    except Exception as e:
+        raise ValueError(f"Error running xtb:\n{e}")
+
+def csearch(use_tempdir=True, **kwargs):
+    """
+    Run a conformational search on a molecule using ``crest``.
+
+    Args:
+        molecule (cctk.Molecule): molecule of interest
+        constraints (list): list of atom numbers to constrain
+        nprocs (int): number of processors to use
+        noncovalent (Bool): whether or not to use non-covalent settings
+        logfile (str): file to write ongoing ``crest`` output to
+        additional_flags (str): flags to pass to command line
+
+    Returns:
+        cctk.ConformationalEnsemble
+    """
+    assert installed("crest"), "crest must be installed!"
+
+    ensemble = None
+    try:
+        if use_tempdir:
+            with tempfile.TemporaryDirectory() as tmpdir:
+                ensemble = _do_csearch(directory=tmpdir, **kwargs)
+        else:
+            ensemble = _do_csearch(directory=os.getcwd(), **kwargs)
+    except Exception as e:
+        raise ValueError(f"Error running xtb:\n{e}")
+
+    return ensemble
+
+def _do_csearch(molecule, directory, gfn=2, nprocs=1, logfile=None, noncovalent=False, constraints=None, additional_flags=None):
+    assert isinstance(molecule, cctk.Molecule), "need a valid molecule!"
+    assert isinstance(nprocs, int)
+    assert isinstance(logfile, str)
+
+    assert gfn in [2, 1, "ff"], "invalid value for ``gfn``!"
+
+    cctk.XYZFile.write_molecule_to_file(f"{directory}/xtb-in.xyz", molecule)
+
+    nci = ""
+    if noncovalent:
+        nci = "-nci"
+
+    command = None
+    if constraints is not None:
+        assert isinstance(constraints, list)
+        assert all(isinstance(n, int) for n in constraints)
+        command = f"crest xtb-in.xyz --constrain {','.join([str(c) for c in constraints])}"
+        result = sp.run(command, stdout=sp.PIPE, stderr=sp.PIPE, cwd=directory, shell=True)
+        result.check_returncode()
+        command = f"crest xtb-in.xyz --gfn{gfn} --chrg {molecule.charge} -cinp .xcontrol.sample --uhf {molecule.multiplicity - 1} -T {nprocs} {nci}"
+    else:
+        command = f"crest xtb-in.xyz --gfn{gfn} --chrg {molecule.charge} --uhf {molecule.multiplicity - 1} -T {nprocs} {nci}"
+
+    if additional_flags is not None:
+        command = command + " " + additional_flags
+
+    if logfile:
+        with open(logfile, "w") as f:
+            result = sp.run(command, stdout=f, stderr=f, cwd=directory, shell=True)
+    else:
+        result = sp.run(command, stdout=sp.PIPE, stderr=sp.PIPE, cwd=directory, shell=True)
+    result.check_returncode()
+
+    ensemble = cctk.XYZFile.read_ensemble(f"{directory}/crest_conformers.xyz").ensemble
+    return ensemble
+
+
+
diff --git a/build/lib/cctk/orca_file.py b/build/lib/cctk/orca_file.py
new file mode 100644
index 0000000..0c1e62e
--- /dev/null
+++ b/build/lib/cctk/orca_file.py
@@ -0,0 +1,375 @@
+import re
+import numpy as np
+
+from enum import Enum
+
+from cctk import File, Molecule, ConformationalEnsemble
+from cctk.helper_functions import get_symbol, get_corrected_free_energy
+
+import cctk.parse_orca as parse
+
+class OrcaJobType(Enum):
+    """
+    Class representing allowed Orca job types. Not an exhaustive list, but should be fairly comprehensive.
+
+    The value should be the Orca keyword, to permit automatic assignment.
+
+    All jobs have type ``SP`` by default.
+    """
+
+    SP = "sp"
+    """
+    Single point energy calculation.
+    """
+
+    OPT = "opt"
+    """
+    Geometry optimization.
+    """
+
+    FREQ = "freq"
+    """
+    Hessian calculation.
+    """
+
+    NMR = "nmr"
+    """
+    NMR shielding prediction.
+    """
+
+#### This static variable tells what properties are expected from each JobType.
+EXPECTED_PROPERTIES = {
+    "sp": ["energy", "scf_iterations",],
+#    "opt": ["rms_gradient", "rms_step", "max_gradient", "max_step"],
+    "opt": [],
+    "freq": ["gibbs_free_energy", "enthalpy", "frequencies", "temperature"],
+    "nmr": ["isotropic_shielding",],
+}
+
+
+class OrcaFile(File):
+    """
+    Generic class for all Orca `.inp` and `.out` files.
+
+    Attributes:
+        ensemble (ConformationalEnsemble): `ConformationalEnsemble` instance
+        job_types (list): list of ``OrcaJobType`` instances
+        header (str): keyword line or lines
+        variables (dict): list of variables to specify (e.g. ``{"maxcore": 2000}``).
+        blocks (dict): list of blocks to change specific settings
+            In general, the key should be the block name and the value should be a list of desired lines.
+            For instance, configuring a time-dependent DFT job might look like ``{"tddft": ["maxdim 5", "nroots 50"]}``
+        successful_terminations (int): number of successful terminations
+        elapsed_time (float): total time for job in seconds
+    """
+
+    def __init__(self, job_types, ensemble=None,  header=None, variables=None, blocks=None):
+        if job_types is not None:
+            if not all(isinstance(job, OrcaJobType) for job in job_types):
+                raise TypeError(f"invalid job type {job}")
+            self.job_types = job_types
+        else:
+            raise ValueError("need job types for new Orca file")
+
+        if ensemble and isinstance(ensemble, ConformationalEnsemble):
+            self.ensemble = ensemble
+        else:
+            self.ensemble = ConformationalEnsemble()
+
+        if header and isinstance(header, str):
+            self.header = header
+        else:
+            self.header = None
+
+        if blocks and isinstance(blocks, dict):
+            for lines in list(blocks.values()):
+                assert isinstance(lines, list)
+            self.blocks = blocks
+        else:
+            self.blocks = {}
+
+        if variables and isinstance(variables, dict):
+            self.variables = variables
+        else:
+            self.variables = {}
+
+    @classmethod
+    def read_file(cls, filename):
+        if re.search("inp$", filename):
+            return cls._read_inp_file(filename)
+
+        multiple_lines = parse.split_multiple_inputs(filename)
+        files = []
+
+        for lines in multiple_lines:
+            input_lines = parse.extract_input_file(lines)
+            header = parse.read_header(input_lines)
+            job_types = cls._assign_job_types(header)
+            variables, blocks = parse.read_blocks_and_variables(input_lines)
+
+            success = 0
+            elapsed_time = 0
+            for line in lines:
+                if line.strip().startswith("****ORCA TERMINATED NORMALLY****"):
+                    success += 1
+                elif line.startswith("TOTAL RUN TIME"):
+                    fields = line.split()
+                    assert len(fields) == 13, f"unexpected number of fields on elapsed time line:\n{line}"
+                    days = float(fields[3])
+                    hours = float(fields[5])
+                    minutes = float(fields[7])
+                    seconds = float(fields[9])
+                    elapsed_time = days * 86400 + hours * 3600 + minutes * 60 + seconds
+
+            energies, iters = parse.read_energies(lines)
+            if len(energies) == 0:
+                return None
+
+            atomic_numbers, geometries = parse.read_geometries(lines, num_to_find=len(energies))
+            assert len(geometries) >= len(energies), "can't have an energy without a geometry (cf. pigeonhole principle)"
+
+            charge = lines.find_parameter("xyz", 6, 4)[0]
+            multip = lines.find_parameter("xyz", 6, 5)[0]
+
+            #### TODO
+            # detect Mayer bond orders
+
+            f = OrcaFile(job_types, header=header, variables=variables, blocks=blocks)
+            f.elapsed_time = elapsed_time
+            f.successful_terminations = success
+
+            molecules = [None] * len(geometries)
+            properties = [{} for _ in range(len(geometries))]
+            for idx, geom in enumerate(geometries):
+                molecules[idx] = Molecule(atomic_numbers, geom, charge=charge, multiplicity=multip, bonds=None)
+                if idx < len(energies):
+                    properties[idx]["energy"] = energies[idx]
+                properties[idx]["filename"] = filename
+                properties[idx]["iteration"] = idx
+                properties[idx]["scf_iterations"] = iters[idx]
+
+            if multip > 1:
+                s2 = lines.find_parameter("Expectation value of", 6, 5)
+                for idx, spin_contam in enumerate(s2):
+                    properties[idx]["S**2"] = spin_contam
+
+            if OrcaJobType.OPT in job_types:
+                rms_grad, max_grad, rms_step, max_step = parse.read_gradients(lines, len(properties))
+                for idx in range(len(rms_grad)):
+                    if idx < len(rms_grad):
+                        properties[idx]["rms_gradient"] = rms_grad[idx]
+
+                    if idx < len(max_grad):
+                        properties[idx]["max_gradient"] = max_grad[idx]
+
+                    if idx < len(rms_step):
+                        properties[idx]["rms_step"] = rms_step[idx]
+
+                    if idx < len(max_step):
+                        properties[idx]["max_step"] = max_step[idx]
+
+            if OrcaJobType.FREQ in job_types:
+                properties[-1]["frequencies"] = sorted(parse.read_freqs(lines))
+
+                enthalpies = lines.find_parameter("Total Enthalpy", expected_length=5, which_field=3)
+                if len(enthalpies) == 1:
+                    properties[-1]["enthalpy"] = enthalpies[0]
+                elif len(enthalpies) > 1:
+                    raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}")
+
+                gibbs = lines.find_parameter("Final Gibbs free enthalpy", expected_length=7, which_field=5)
+                if len(gibbs) == 1:
+                    properties[-1]["gibbs_free_energy"] = gibbs[0]
+                elif len(gibbs) > 1:
+                    raise ValueError(f"unexpected # of gibbs free energies found!\ngibbs free energies = {enthalpies}")
+
+                temperature = lines.find_parameter("Temperature", expected_length=4, which_field=2)
+                if len(temperature) == 1 and len(gibbs) > 0:
+                    properties[-1]["temperature"] = temperature[0]
+                    corrected_free_energy = get_corrected_free_energy(gibbs[0], properties[-1]["frequencies"],
+                                                                      frequency_cutoff=100.0, temperature=temperature[0])
+                    properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy)
+
+            if OrcaJobType.NMR in job_types:
+                nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms())
+                if nmr_shifts is not None:
+                    properties[-1]["isotropic_shielding"] = nmr_shifts
+
+            try:
+                charges = parse.read_mulliken_charges(lines)
+                assert len(charges) == len(atomic_numbers)
+                properties[-1]["mulliken_charges"] = charges
+            except Exception as e:
+                pass
+
+            try:
+                charges = parse.read_loewdin_charges(lines)
+                assert len(charges) == len(atomic_numbers)
+                properties[-1]["lowdin_charges"] = charges
+            except Exception as e:
+                pass
+
+            try:
+                dipole = lines.find_parameter("Magnitude \(Debye\)", 4, 3)
+                properties[-1]["dipole_moment"] = dipole[0]
+            except Exception as e:
+                pass
+
+            for mol, prop in zip(molecules, properties):
+                f.ensemble.add_molecule(mol, properties=prop)
+
+            f.check_has_properties()
+            files.append(f)
+
+        if len(files) == 1:
+            return files[0]
+        else:
+            return files
+
+    @classmethod
+    def _read_inp_file(cls, filename):
+        print("reading ``.inp`` files is not currently supported :(")
+        return None
+
+    def write_file(self, filename, molecule=None, header=None, variables=None, blocks=None):
+        """
+        Write a ``.inp`` file, using object attributes. If no header is specified, the object's header will be used.
+
+        Args:
+            filename (str): path to the new file
+            molecule (int): which molecule to use -- passed to ``self.get_molecule()``.
+                Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble.molecules (0-indexed).
+                A ``Molecule`` object can also be passed, in which case that molecule will be written to the file.
+            header (str): header for new file
+        """
+        if molecule is None:
+            molecule = -1
+        if not isinstance(molecule, Molecule):
+            molecule = self.ensemble.molecules[molecule]
+
+        if header is None:
+            header = self.header
+
+        if variables is None:
+            variables = self.variables
+
+        if blocks is None:
+            blocks = self.blocks
+
+        self.write_molecule_to_file(filename, molecule, header, variables, blocks)
+
+    @classmethod
+    def write_molecule_to_file(cls, filename, molecule, header, variables=None, blocks=None, print_symbol=False):
+        """
+        Write an ``.inp`` file using the given molecule.
+
+        Args:
+            filename (str): path to the new file
+            molecule (Molecule): which molecule to use -- a ``Molecule`` object.
+            header (str): header for new file
+            print_symbol (Bool): if atomic symbols should be printed instead of atomic numbers
+        """
+        assert isinstance(molecule, Molecule), "need a valid molecule to write a file!"
+        assert isinstance(header, str), "can't write a file without a header"
+
+        text = f"{header.strip()}\n"
+
+        if variables is not None:
+            assert isinstance(variables, dict), "blocks must be a dictionary"
+            for k, v in variables.items():
+                text += f"%{k} {v}\n"
+
+        if blocks is not None:
+            assert isinstance(blocks, dict), "blocks must be a dictionary"
+            for k, v in blocks.items():
+                text += f"%{k}\n"
+                for line in v:
+                    text += f"\t{line}\n"
+                text += "end\n"
+
+        text +="\n"
+
+        text += f"* xyz {int(molecule.charge)} {int(molecule.multiplicity)}\n"
+        for index, Z in enumerate(molecule.atomic_numbers, start=1):
+            line = molecule.get_vector(index)
+            if print_symbol:
+                Z = get_symbol(Z)
+                text += f"{Z:>2}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
+            else:
+                text += f"{Z:2d}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
+
+        text += "*\n"
+        text += "\n"
+
+        #### write the file
+        super().write_file(filename, text)
+
+    def get_molecule(self, num=None):
+        """
+        Returns the last molecule (from an optimization job or other multi-molecule jobs) or the only molecule (from other jobs).
+
+        If ``num`` is specified, returns that job (1-indexed for positive numbers). So ``job.get_molecule(3)`` will return the 3rd element of ``job.molecules``, not the 4th.
+        """
+        # some methods pass num=None, which overrides setting the default above
+        if num is None:
+            num = -1
+
+        if not isinstance(num, int):
+            raise TypeError("num must be int")
+
+        return self.ensemble.molecule_list()[num]
+
+    def num_imaginaries(self):
+        """
+        Returns the number of imaginary frequencies.
+        """
+        return len(self.imaginaries())
+
+    def imaginaries(self):
+        """
+        Returns the imaginary frequencies, rounded to the nearest integer.
+        """
+        if (OrcaJobType.FREQ in self.job_types) and (self.ensemble[-1:,"frequencies"] is not None):
+            freqs = self.ensemble[-1:,"frequencies"]
+            if not isinstance(freqs, list) or len(freqs) == 0:
+                return list()
+            else:
+                return list(map(int, np.array(freqs)[np.array(freqs) < 0]))
+        else:
+            return list()
+
+
+    @classmethod
+    def _assign_job_types(cls, header):
+        """
+        Assigns ``OrcaJobType`` objects from route card. ``OrcaJobType.SP`` is assigned by default.
+
+        Args:
+            header (str): Orca header
+
+        Returns:
+            list of ``OrcaJobType`` objects
+        """
+        job_types = []
+        for name, member in OrcaJobType.__members__.items():
+            if re.search(f" {member.value}", str(header), re.IGNORECASE):
+                job_types.append(member)
+        if OrcaJobType.SP not in job_types:
+            job_types.append(OrcaJobType.SP)
+        return job_types
+
+    def check_has_properties(self):
+        """
+        Checks that the file has all the appropriate properties for its job types, and raises ``ValueError`` if not.
+
+        This only checks the last molecule in ``self.ensemble``, for now.
+        """
+        if self.successful_terminations > 0:
+            for job_type in self.job_types:
+                for prop in EXPECTED_PROPERTIES[job_type.value]:
+                    if not self.ensemble.has_property(-1, prop):
+                        raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!")
+        else:
+            return
+
+
diff --git a/build/lib/cctk/parse_gaussian.py b/build/lib/cctk/parse_gaussian.py
new file mode 100644
index 0000000..3cd9eb8
--- /dev/null
+++ b/build/lib/cctk/parse_gaussian.py
@@ -0,0 +1,768 @@
+import numpy as np
+import re
+import ahocorasick
+
+import cctk
+from cctk.helper_functions import get_corrected_free_energy
+
+"""
+Functions to help with parsing Gaussian files
+"""
+
+def read_file_fast(file_text, filename, link1idx, max_len=20000, extended_opt_info=False, fail_silently=True):
+
+    #### "Make your bottleneck routines fast, everything else clear" - M. Scott Shell, UCSB
+    #### Welcome to the fast part!
+
+    #### Here we identify all the lines we're going to scrape
+    words = [
+        "SCF Done",
+        "Entering Link 1",
+        "Normal termination",
+        "Elapsed time",
+        "Multiplicity",
+        "RMS     Force", #5
+        "RMS     Displacement",
+        "Maximum Force",
+        "Maximum Displacement",
+        "Cartesian Forces",
+        "Internal  Forces", #10
+        "Predicted change in Energy",
+        "thermal Enthalpies",
+        "thermal Free Energies",
+        "Frequencies",
+        "Temperature", #15
+        "Isotropic",
+        "EUMP2",
+        "EUMP3",
+        "UMP4(SDTQ)",
+        "Wavefunction amplitudes converged", #20
+    ]
+
+    #### And here are the blocks of text
+    #### format: [start, stop, num]
+
+    blocks = [
+        ["#p", "----", 1],
+        ["/99;", "Symbolic Z-matrix", 1],
+        ["The following ModRedundant input section", "\n \n", 1],
+        [
+            ["Input orientation", "Standard orientation", "Cartesian Coordinates"],
+            "Leave Link  202",
+            1000,
+        ],
+        ["Wallingford", "#p", 1],
+        ["Initial Parameters", "! A", 1], #5
+        ["Total nuclear spin-spin coupling J", "Leave Link", 1],
+        ["Forces (Hartrees/Bohr)", "Cartesian Forces", 1],
+        ["Hirshfeld charges, spin densities, dipoles, and CM5 charges", " Hirshfeld charges", 1],
+        ["Mulliken charges", "Sum of Mulliken charges", 1],
+        ["Electronic spatial extent", "Quadrupole moment", 1], #10
+        ["normal coordinates", "Thermochemistry", 1],
+        ["Isotropic", "Eigenvalues", 1000],
+    ]
+
+    word_matches = [[] for _ in words]
+    block_matches = [[] for _ in blocks]
+
+    A = ahocorasick.Automaton()
+
+    for idx, word in enumerate(words):
+        A.add_word(word, idx)
+
+    for idx, b in enumerate(blocks):
+        if isinstance(b[0], list):
+            for start in b[0]:
+                A.add_word(start, ("start", idx))
+        else:
+            A.add_word(b[0], ("start", idx))
+
+    #### perform search
+    A.make_automaton()
+    found_words = A.iter(file_text)
+
+    #### now, we have to expand our one-character matches to whole lines/blocks
+    #### this is the slowest part
+    for position, idx in found_words:
+        if isinstance(idx, int):
+            stepsize = 10
+
+            match = file_text[position]
+            i = position + 1
+            while match[-1-stepsize:].find("\n") < 0:
+                match = match + file_text[i:i+stepsize]
+                i += stepsize
+
+            match = match.split("\n")[0]
+
+            j = position
+            while match[:stepsize].find("\n") < 0:
+                match = file_text[j-stepsize:j] + match
+                j += -1 * stepsize
+
+            match = match.split("\n")[-1]
+            word_matches[idx].append(match)
+
+        elif isinstance(idx, tuple):
+            idx = idx[1]
+            if len(block_matches[idx]) >= blocks[idx][2]:
+                continue
+
+            match = ""
+            i = position - len(blocks[idx][0]) + 1
+            end = blocks[idx][1]
+
+            stepsize = 1000
+            file_len = len(file_text)
+
+            #### we're looking for the end, but we take steps with length ``stepsize`` to go faster
+            while match[-1 * (stepsize + len(end)):-1].count(end) == 0 and match.count("\n") < max_len:
+                match = match + file_text[i:i+stepsize]
+                i += stepsize
+
+                if i > file_len:
+                    break
+
+            match = match.split(end)[0]
+
+            # special geometry handling :/
+            if idx == 3:
+                # ccw 10.8.2021 - changed "==" to "<=" to prevent issues where # geoms would get stuck.
+                # can't remember quite why this was needed. hopefully it is ok this way. tests pass.
+                if len(block_matches[3]) <= len(word_matches[0]):
+                    block_matches[3].append(match)
+                else:
+                    block_matches[3][-1] = match
+
+            else:
+                block_matches[idx].append(match)
+
+    del file_text # here, have your RAM back!
+
+    if len(block_matches[1]) == 0:
+        raise ValueError(f"Can't find a title block - something is wrong with {filename}! (cctk requires Gaussian output files to have been run in ``#p`` verbose mode)")
+
+    #### and from here, we're off to the races!
+    n, g = parse_geometry(block_matches[3])
+    title, link0, route_card, footer, job_types = parse_header_footer(block_matches[0], block_matches[1], block_matches[2], block_matches[4])
+    energies, scf_iterations = parse_energies(word_matches[0])
+    success, elapsed_time = parse_success_elapsed_time(word_matches[2], word_matches[3])
+    charge, multip = parse_charge_multiplicity(word_matches[4])
+    bonds = parse_bonds(block_matches[5])
+
+    # post-HF methods give weird energies
+    if re.search("mp2", route_card, re.IGNORECASE):
+        energies = parse_mp2_energies(word_matches[17])
+    elif re.search("mp3", route_card, re.IGNORECASE):
+        energies = parse_mp3_energies(word_matches[18])
+    elif re.search("mp4", route_card, re.IGNORECASE):
+        energies = parse_mp4_energies(word_matches[19])
+    elif re.search("ccsd", route_card, re.IGNORECASE):
+        energies = parse_cc_energies(word_matches[20])
+    elif re.search("cisd", route_card, re.IGNORECASE):
+        energies = parse_ci_energies(word_matches[20])
+
+    f = cctk.GaussianFile(job_types=job_types, route_card=route_card, link0=link0, footer=footer, success=success, elapsed_time=elapsed_time, title=title)
+
+    molecules = [None] * len(g)
+    properties = [{} for _ in range(len(g))]
+    for idx, geom in enumerate(g):
+        molecules[idx] = cctk.Molecule(n[0], geom, charge=charge, multiplicity=multip, bonds=bonds, checks=False)
+        if idx < len(energies):
+            properties[idx]["energy"] = energies[idx]
+        if idx < len(scf_iterations):
+            properties[idx]["scf_iterations"] = scf_iterations[idx]
+        properties[idx]["link1_idx"] = link1idx
+        properties[idx]["filename"] = filename
+        properties[idx]["iteration"] = idx
+
+    if cctk.GaussianJobType.OPT in job_types:
+        rms_forces = extract_parameter(word_matches[5], 2)
+        rms_disp = extract_parameter(word_matches[6], 2)
+
+        if extended_opt_info:
+            max_forces = extract_parameter(word_matches[7], 2)
+            max_disp = extract_parameter(word_matches[8], 2)
+            rms_grad = extract_parameter(word_matches[9], 5)
+            max_grad = extract_parameter(word_matches[9], 3)
+            rms_int = extract_parameter(word_matches[10], 5)
+            max_int = extract_parameter(word_matches[10], 3)
+            delta_e = extract_parameter(word_matches[11], 3, cast_to_float=False)
+
+        # ccw 10.8.2021 - ad hoc correction to Gaussian. unsure what's going on here. sometimes len(rms_forces) > len(g)
+        force_property_index = min(len(g), len(rms_forces))
+
+        for idx in range(force_property_index):
+            properties[idx]["rms_force"] = rms_forces[idx]
+            properties[idx]["rms_displacement"] = rms_disp[idx]
+
+            if extended_opt_info:
+                if idx < len(max_forces):
+                    properties[idx]["max_force"] = max_forces[idx]
+
+                if idx < len(max_disp):
+                    properties[idx]["max_displacement"] = max_disp[idx]
+
+                if idx < len(max_grad):
+                    properties[idx]["max_gradient"] = max_grad[idx]
+
+                if idx < len(rms_grad):
+                    properties[idx]["rms_gradient"] = rms_grad[idx]
+
+                if idx < len(max_int):
+                    properties[idx]["max_internal_force"] = max_int[idx]
+
+                if idx < len(rms_int):
+                    properties[idx]["rms_internal_force"] = rms_int[idx]
+
+                if idx < len(delta_e):
+                    change_in_energy = re.sub(r"Energy=", "", delta_e[idx])
+                    properties[idx]["predicted_change_in_energy"] = float(change_in_energy.replace('D', 'E'))
+
+    if cctk.GaussianJobType.FREQ in job_types and len(molecules):
+        enthalpies = extract_parameter(word_matches[12], 6)
+        if len(enthalpies) == 1:
+            properties[-1]["enthalpy"] = enthalpies[0]
+        elif len(enthalpies) > 1:
+            raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}")
+
+        gibbs_vals = extract_parameter(word_matches[13], 7)
+        if len(gibbs_vals) == 1:
+            properties[-1]["gibbs_free_energy"] = gibbs_vals[0]
+        elif len(gibbs_vals) > 1:
+            raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}")
+
+        vibrational_modes = parse_modes(block_matches[11], num_atoms=molecules[-1].num_atoms(), hpmodes=re.search("hpmodes", route_card))
+        molecules[-1].vibrational_modes = vibrational_modes
+
+        frequencies = []
+        try:
+            frequencies += extract_parameter(word_matches[14], 2)
+
+            # very small molecules might only have 1 or 2 freqs
+            try:
+                frequencies += extract_parameter(word_matches[14], 3)
+            except Exception as e:
+                pass
+            try:
+                frequencies += extract_parameter(word_matches[14], 4)
+            except Exception as e:
+                pass
+
+            properties[-1]["frequencies"] = sorted(frequencies)
+        except Exception as e:
+            raise ValueError("error finding frequencies")
+
+        temperature = extract_parameter(word_matches[15], 1)
+        if len(temperature) == 1:
+            properties[-1]["temperature"] = temperature[0]
+            corrected_free_energy = get_corrected_free_energy(gibbs_vals[0], frequencies, frequency_cutoff=100.0, temperature=temperature[0])
+            properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy)
+
+    if cctk.GaussianJobType.NMR in job_types:
+        nmr_shifts, shielding_tensors = read_nmr_shifts(block_matches[12], molecules[0].num_atoms())
+        if nmr_shifts is not None:
+            properties[-1]["isotropic_shielding"] = nmr_shifts.view(cctk.OneIndexedArray)
+            properties[-1]["shielding_tensors"] = shielding_tensors
+
+        if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search("nmr=spinspin", f.route_card,flags=re.IGNORECASE):
+            couplings = read_j_couplings(block_matches[6], molecules[0].num_atoms())
+            if couplings is not None:
+                properties[-1]["j_couplings"] = couplings
+
+    if cctk.GaussianJobType.FORCE in job_types and len(molecules):
+        assert len(molecules) == 1, "force jobs should not be combined with optimizations!"
+        force_block = block_matches[7]
+        if len(force_block) == 0:
+            raise ValueError("no forces to parse!")
+        forces = parse_forces(force_block)
+        properties[0]["forces"] = forces
+
+    if cctk.GaussianJobType.POP in job_types and len(molecules):
+        if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card) and len(block_matches[8]) > 0:
+            charges, spins = parse_hirshfeld(block_matches[8])
+            properties[-1]["hirshfeld_charges"] = charges
+            properties[-1]["hirshfeld_spins"] = spins
+
+    if len(molecules):
+        try:
+            charges, dipole, dipole_v = parse_charges_dipole(block_matches[9], block_matches[10])
+            properties[-1]["mulliken_charges"] = charges
+            properties[-1]["dipole_moment"] = dipole
+            properties[-1]["dipole_vector"] = dipole_v
+        except Exception as e:
+            pass
+
+    for mol, prop in zip(molecules, properties):
+        f.ensemble.add_molecule(mol, properties=prop)
+
+    if fail_silently:
+        try:
+            f.check_has_properties()
+        except Exception as e:
+            # silently exclude this file
+            return None
+    else:
+        f.check_has_properties()
+
+    return f
+
+
+def parse_geometry(blocks):
+    nums = []
+    geoms = []
+    for block in blocks:
+        current_nums = []
+        current_geoms = []
+        for line in block.split("\n")[4:-2]:
+            if re.search("Distance", line) or re.search("Rotational constants", line):
+                break
+
+            # on some jobs, the normal ending flags get cut off? but this should fix it.
+            # ccw 6.10.22
+            if re.search("One-electron integrals computed using", line):
+                break
+
+            pieces = list(filter(None, line.split(" ")))
+            if len(pieces) != 6:
+                continue
+            try:
+                current_nums.append(int(pieces[1]))
+                current_geoms.append([float(pieces[3]), float(pieces[4]), float(pieces[5])])
+            except:
+                print(block)
+                print("\n\n")
+                print(line)
+        nums.append(current_nums)
+        geoms.append(current_geoms)
+    return nums, geoms
+
+def parse_header_footer(route_block, title_block, footer_block, link0_block):
+    link0 = dict()
+    route_card = ""
+    footer = None
+    title = ""
+    job_types = []
+
+    # 2 lines before 'Symbolic Z Matrix'
+    title = title_block[0].split("\n")[-3].strip()
+
+    for line in route_block[0].split("\n"):
+        route_card += line.lstrip()
+
+    if len(footer_block) > 0:
+        footer = "\n".join(list(footer_block[0].split("\n"))[1:])  # get rid of the first line
+        footer = "\n".join([" ".join(list(filter(None, line.split(" ")))) for line in footer.split("\n")])
+
+    for line in link0_block[0].split("\n"):
+        if re.match(" \%", line):
+            pieces = line[2:].split("=")
+            link0[pieces[0]] = pieces[1]
+
+    for name, member in cctk.GaussianJobType.__members__.items():
+        if re.search(f" {member.value}", str(route_card), re.IGNORECASE):
+            job_types.append(member)
+    if cctk.GaussianJobType.SP not in job_types:
+        job_types.append(cctk.GaussianJobType.SP)
+
+    return title, link0, route_card, footer, job_types
+
+def parse_energies(scf_done_block):
+    energies = []
+    iters = []
+
+    for line in scf_done_block:
+        pieces = list(filter(None, line.split(" ")))
+        energies.append(float(pieces[4]))
+        iters.append(int(pieces[7]))
+
+    return energies, iters
+
+def parse_success_elapsed_time(success_lines, time_lines):
+    successes = len(success_lines)
+    elapsed_time = 0
+    for line in time_lines:
+        fields = list(filter(None, line.split(" ")))
+        elapsed_time += int(fields[2]) * 86400 + int(fields[4]) * 3600 + int(fields[6]) * 60 + float(fields[8])
+    return successes, elapsed_time
+
+def parse_charge_multiplicity(charge_line):
+    fields = list(filter(None, charge_line[0].replace("=", " ").split(" ")))
+    return int(fields[1]), int(fields[3])
+
+def parse_bonds(bonding_block):
+    if len(bonding_block) == 0:
+        return None
+
+    bond_array = []
+    for line in bonding_block[0].split("\n"):
+        if re.search(r"! R", line):
+            pieces = list(filter(None, line.split(" ")))
+            atoms = pieces[2].replace("R", "").replace("(", "").replace(")", "").split(",")
+            try:
+                bond_array.append([int(atoms[0]), int(atoms[1])])
+            except Exception as e:
+                raise ValueError(f"error parsing line - can't extract atoms!\n{line}\e{e}")
+    return bond_array
+
+def split_link1_to_text(filename):
+    link1_blocks = []
+    with open(filename, "r") as lines:
+        current_text = ""
+        for idx, line in enumerate(lines):
+            current_text = current_text + line
+            if re.search("Entering Link 1", line):
+                link1_blocks.append(current_text)
+                current_text = ""
+        link1_blocks.append(current_text)
+    return link1_blocks[1:] #### the first block is just a few lines
+
+def extract_parameter(lines, position, cast_to_float=True):
+    vals = []
+    for line in lines:
+        pieces = list(filter(None, line.split(" ")))
+        if cast_to_float:
+            try:
+                vals.append(float(pieces[position]))
+            except Exception as e:
+                #### sometimes RMS Force comes thru as "******" for some reason
+                vals.append(0)
+        else:
+            vals.append(pieces[position])
+    return vals
+
+def parse_forces(force_block):
+    forces = []
+    try:
+        split_block = force_block[0].split("\n")[2:]
+    except Exception as e:
+#        print(e)
+#        print("------force block-------")
+#        print(force_block)
+        raise e
+    for line in split_block:
+        fields = re.split(" +", line)
+        fields = list(filter(None, fields))
+
+        if len(fields) == 5:
+            forces.append([float(fields[2]), float(fields[3]), float(fields[4])])
+
+    return cctk.OneIndexedArray(forces)
+
+def parse_charges_dipole(mulliken_block, dipole_block):
+    charges = []
+    dipole = 0
+    dipole_v = np.zeros(shape=3)
+
+    for line in mulliken_block[0].split("\n")[2:]:
+        fields = re.split(" +", line)
+        fields = list(filter(None, fields))
+
+        if len(fields) == 3:
+            charges.append(float(fields[2]))
+
+    for line in dipole_block[0].split("\n")[1:]:
+        fields = re.split(" +", line)
+        fields = list(filter(None, fields))
+
+        if len(fields) == 8:
+            dipole_v[0] = float(fields[1])
+            dipole_v[1] = float(fields[3])
+            dipole_v[2] = float(fields[5])
+            dipole = float(fields[7])
+            break
+
+    return cctk.OneIndexedArray(charges), dipole, dipole_v
+
+def parse_hirshfeld(hirshfeld_block):
+    charges = []
+    spins = []
+
+    if len(hirshfeld_block) == 0:
+        return None, None
+
+    for line in hirshfeld_block[0].split("\n")[2:]:
+        fields = re.split(" +", line)
+        fields = list(filter(None, fields))
+
+        if len(fields) == 8:
+            charges.append(float(fields[2]))
+            spins.append(float(fields[3]))
+
+    return cctk.OneIndexedArray(charges), cctk.OneIndexedArray(spins)
+
+def parse_modes(freq_block, num_atoms, hpmodes=False):
+    freqs = list()
+    masses = list()
+    force_ks = list()
+    intensities = list()
+    displacements = list()
+
+    if len(freq_block) == 0:
+        return list()
+
+    chunks = freq_block[0].split("Freq")
+
+    if hpmodes:
+        chunks = chunks[1:]
+
+    for chunk in chunks:
+        lines = chunk.split("\n")
+
+        if hpmodes:
+            num_cols = len(re.split(" +", lines[0])) - 2
+            current_displacements = [np.zeros(shape=(num_atoms, 3)) for x in range(num_cols)]
+
+            if len(freqs):
+                new_freqs = list(filter(None, re.split(" +", lines[0])))[2:]
+
+                if float(new_freqs[-1]) <= float(freqs[-1]):
+                    break # want to skip the non-hpmodes section, so no looping allowed
+                else:
+                    freqs += new_freqs
+            else:
+                freqs += list(filter(None, re.split(" +", lines[0])))[2:]
+
+            masses += list(filter(None, re.split(" +", lines[1])))[3:]
+            force_ks += list(filter(None, re.split(" +", lines[2])))[3:]
+            intensities += list(filter(None, re.split(" +", lines[3])))[3:]
+
+            for line in lines[6:]:
+                fields = re.split(" +", line)
+                fields = list(filter(None, fields))
+
+                if len(fields) < (num_cols + 3):
+                    continue
+
+                if fields[0] == "Harmonic":
+                    break
+
+                for col_idx, val in enumerate(fields[3:]):
+                    current_displacements[col_idx][int(fields[1])-1][int(fields[0])-1] = val
+
+            for d in current_displacements:
+                displacements.append(d.view(cctk.OneIndexedArray))
+
+        else:
+            current_displacements = [list() for _ in re.split(" +", lines[0])[2:]]
+
+            freqs += re.split(" +", lines[0])[2:]
+            masses += re.split(" +", lines[1])[4:]
+            force_ks += re.split(" +", lines[2])[4:]
+            intensities += re.split(" +", lines[3].rstrip())[4:]
+
+            for line in lines[5:]:
+                fields = re.split(" +", line)
+                fields = list(filter(None, fields))
+
+                if len(fields) < 4:
+                    break
+
+                current_displacements[0].append([float(x) for x in fields[2:5]])
+
+                if len(current_displacements) > 1:
+                    current_displacements[1].append([float(x) for x in fields[5:8]])
+
+                if len(current_displacements) > 2:
+                    current_displacements[2].append([float(x) for x in fields[8:11]])
+
+            for d in current_displacements:
+                displacements.append(cctk.OneIndexedArray(d))
+
+    freqs = [float(x) for x in freqs]
+    masses = [float(x) for x in masses]
+    force_ks = [float(x) for x in force_ks]
+    intensities = [float(x) for x in intensities]
+
+    assert len(freqs) == len(masses)
+    assert len(freqs) == len(force_ks)
+    assert len(freqs) == len(displacements)
+
+    modes = list()
+    for f, m, k, i, d in zip(freqs, masses, force_ks, intensities, displacements):
+        k *= 143.9326 # mdyne Å**-1 to kcal/mol Å**-2
+        modes.append(cctk.VibrationalMode(frequency=f, reduced_mass=m, force_constant=k, intensity=i, displacements=d))
+
+    return modes
+
+def read_j_couplings(lines, n_atoms):
+    """
+    Helper method to search through output file and read J couplings
+    Args:
+        lines (list): list of lines in file
+        n_atoms (int): how many atoms are in the molecule
+    Returns:
+        ``couplings`` symmetric 2D np.array of couplings (in Hz) with zero-indexed atoms on both axes
+        or None if no couplings were found
+    """
+    couplings = np.zeros((n_atoms,n_atoms))
+    n_full_blocks, lines_in_partial_block = divmod(n_atoms,5)
+    n_lines = 5 * (n_full_blocks * (n_full_blocks+1) / 2) + n_full_blocks + 1
+    if lines_in_partial_block > 0:
+        n_lines += 1 + lines_in_partial_block
+    n_lines = int(n_lines)
+
+    lines = lines[0].split("\n")
+
+    i = 0
+    read_column_indices = False
+    read_row = False
+    this_column_indices = []
+    while i < n_lines:
+        # get current line
+        line = lines[i]
+
+        # if this is the header, we should be reading the column indices next
+        if "Total nuclear spin-spin coupling J (Hz):" in line:
+            i += 1
+            read_column_indices = True
+            continue
+
+        # this is not the header, so split the fields
+        fields = line.split()
+
+        # read the column indices
+        if read_column_indices:
+#            this_n_columns = len(fields)
+            this_column_indices = [ int(j)-1 for j in fields ]
+            i += 1
+            read_column_indices = False
+            read_row = True
+            continue
+        elif read_row:
+            row = int(fields[0])-1
+            for j,value in enumerate(fields[1:]):
+                column = this_column_indices[j]
+                value = value.replace("D","E")
+                value = float(value)
+                couplings[row,column] = value
+                couplings[column,row] = value
+
+            # check if we have read the entire matrix
+            if row == n_atoms - 1 and column == n_atoms - 1:
+                break
+
+            # check if this is the end of the current block
+            if row == n_atoms - 1:
+                read_column_indices = True
+                read_row = False
+                i += 1
+                continue
+
+            read_row = True
+            i += 1
+            continue
+        else:
+            raise ValueError("impossible")
+
+    return couplings
+
+def parse_mp2_energies(lines):
+    energies = []
+    for line in lines:
+        pieces = list(filter(None, line.split(" ")))
+        energy_str = pieces[5]
+        energy_str = re.sub("D", "E", energy_str)
+        energies.append(float(energy_str))
+    return energies
+
+def parse_mp3_energies(lines):
+    energies = []
+    for line in lines:
+        pieces = list(filter(None, line.split(" ")))
+        energy_str = pieces[3]
+        energy_str = re.sub("D", "E", energy_str)
+        energies.append(float(energy_str))
+    return energies
+
+def parse_mp4_energies(lines):
+    energies = []
+    for line in lines:
+        pieces = list(filter(None, line.split(" ")))
+        energy_str = pieces[3]
+        energy_str = re.sub("D", "E", energy_str)
+        energies.append(float(energy_str))
+    return energies
+
+def parse_cc_energies(lines):
+    energies = []
+    for line in lines:
+        pieces = list(filter(None, line.split(" ")))
+        energy_str = pieces[4]
+        energy_str = re.sub("D", "E", energy_str)
+        energies.append(float(energy_str))
+    return energies
+
+def parse_ci_energies(lines):
+    return parse_cc_energies(lines)
+
+def read_nmr_shifts(blocks, num_atoms):
+    """
+    Helper method to search through output file and read NMR shifts.
+    Args:
+        lines (list): list of lines in file
+        num_atoms (int): number of atoms expected
+    Returns:
+        list of isotropic NMR shifts (np.ndarray)
+        list of shielding tensors (list of 3x3 np.ndarray)
+    """
+    # assumes that lines only come from one Link1 section
+    shieldings = []
+    tensors = []
+    for block in blocks:
+        lines = block.split("\n")
+        tensor = np.zeros(shape=(3,3))
+        for line in lines:
+            fields = line.split()
+            # there are 8 on each line but we truncate the first 2 in the block selection process
+            if len(fields) == 6 and fields[0] == "Isotropic" and fields[3] == "Anisotropy":
+                fields = line.split()
+                assert len(fields) == 6, f"Expected 6 fields on an NMR shielding output line but found {len(fields)} instead!"
+                try:
+                    shielding = float(fields[2])
+                except Exception as e:
+                    raise ValueError(f"Error parsing NMR shielding output line:\n{line}")
+                shieldings.append(shielding)
+
+        # yes, this is very elegant.
+        tensor[0][0] = float(re.search("XX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[0][1] = float(re.search("XY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[0][2] = float(re.search("XZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[1][0] = float(re.search("YX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[1][1] = float(re.search("YY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[1][2] = float(re.search("YZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[2][0] = float(re.search("ZX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[2][1] = float(re.search("ZY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[2][2] = float(re.search("ZZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensors.append(tensor)
+
+    if len(shieldings) != 0:
+        assert len(shieldings) == num_atoms, f"Expected {num_atoms} shieldings but found {len(shieldings)}!"
+        for shielding, tensor in zip(shieldings, tensors):
+            assert 0.01 > abs(np.trace(tensor)/3 - shielding)
+        return np.asarray(shieldings), tensors
+    else:
+        #### we can catch this problem later if the file is finished
+        return None, None
+
+def split_link1(filename):
+    """
+    Splits ``filename`` into blocks by searching for "Entering Link 1".
+    Args:
+        filename (str): path to file
+    Returns:
+        list of list of lines by Link1 section; so a file with one Link1 specification would return [lines1, lines2]
+    """
+    link1_blocks = []
+
+    start_block = 0
+    with open(filename, "r") as lines:
+        for idx, line in enumerate(lines):
+            if re.search("Entering Link 1", line):
+                link1_blocks.append(cctk.LazyLineObject(file=filename, start=start_block, end=idx))
+                start_block = idx
+    link1_blocks.append(cctk.LazyLineObject(file=filename, start=start_block, end=idx))
+
+    return link1_blocks[1:] #### the first block is just a few lines
+
+
diff --git a/build/lib/cctk/parse_orca.py b/build/lib/cctk/parse_orca.py
new file mode 100644
index 0000000..3670df6
--- /dev/null
+++ b/build/lib/cctk/parse_orca.py
@@ -0,0 +1,220 @@
+import numpy as np
+import re
+
+from cctk.helper_functions import get_number
+from cctk import OneIndexedArray, LazyLineObject
+
+"""
+Functions to help with parsing Orca files
+"""
+def read_geometries(lines, num_to_find):
+    atomic_numbers = []
+    geometries = []
+
+    geom_blocks = lines.search_for_block("CARTESIAN COORDINATES \(ANGSTROEM\)", "CARTESIAN COORDINATES", join="\n", count=num_to_find, max_len=1000)
+    if num_to_find == 1:
+        geom_blocks = [geom_blocks]
+
+    for block in geom_blocks:
+        rows = block.split("\n")
+        numbers = []
+        geometry = []
+
+        for line in rows[2:]:
+            if len(line.strip()) == 0:
+                continue
+
+            pieces = list(filter(None, line.split(" ")))
+
+            if len(pieces) == 4:
+                if re.match("[0-9]", pieces[0]):
+                    numbers.append(int(pieces[0]))
+                else:
+                    numbers.append(int(get_number(pieces[0])))
+                geometry.append([float(pieces[1]), float(pieces[2]), float(pieces[3])])
+
+        atomic_numbers.append(OneIndexedArray(numbers, dtype=np.int8))
+        geometries.append(OneIndexedArray(geometry))
+
+    assert len(atomic_numbers) == len(geometries)
+    for zs in atomic_numbers:
+        assert np.array_equiv(zs, atomic_numbers[0])
+    return atomic_numbers[0], geometries
+
+def read_energies(lines):
+    energies = lines.find_parameter("FINAL SINGLE POINT ENERGY", 5, 4)
+    iters = lines.find_parameter("SCF CONVERGED AFTER", 7, 4)
+    return energies, iters
+
+def split_multiple_inputs(filename):
+    """
+    Splits ``filename`` into blocks by searching for _________.
+
+    Args:
+        filename (str): path to file
+
+    Returns:
+        list of list of ``LazyLineObject`` by input section
+    """
+    output_blocks = []
+
+    start_block = 0
+    with open(filename, "r") as lines:
+        for idx, line in enumerate(lines):
+            if re.search("Entering Link 1", line): # this will never be true for an Orca file -- this is just a stopgap
+                output_blocks.append(LazyLineObject(file=filename, start=start_block, end=idx))
+                start_block = idx
+    output_blocks.append(LazyLineObject(file=filename, start=start_block, end=idx))
+
+    return output_blocks
+
+def read_mulliken_charges(lines):
+    """
+    Reads charges.
+
+    Args:
+        lines (list): list of lines in file
+
+    Returns:
+        ``cctk.OneIndexedArray`` of charges
+    """
+    charges = []
+    charge_block = lines.search_for_block("MULLIKEN ATOMIC CHARGES", "Sum of atomic charges", join="\n")
+    for line in charge_block.split("\n")[2:]:
+        fields = re.split(" +", line)
+        fields = list(filter(None, fields))
+
+        if len(fields) == 4:
+            charges.append(float(fields[3]))
+
+    return OneIndexedArray(charges)
+
+
+def read_loewdin_charges(lines):
+    """
+    Reads charges.
+
+    Args:
+        lines (list): list of lines in file
+
+    Returns:
+        ``cctk.OneIndexedArray`` of charges
+    """
+    charges = []
+    charge_block = lines.search_for_block("LOEWDIN ATOMIC CHARGES", "^$", join="\n")
+    for line in charge_block.split("\n")[2:]:
+        fields = re.split(" +", line)
+        fields = list(filter(None, fields))
+
+        if len(fields) == 4:
+            charges.append(float(fields[3]))
+
+    return OneIndexedArray(charges)
+
+def read_header(lines):
+    for line in lines:
+        if re.match("!", line):
+            return line
+
+def read_blocks_and_variables(lines):
+    blocks = {}
+    variables = {}
+
+    current_key = None
+    current_val = []
+    for line in lines:
+        if current_key is not None:
+            if re.match("end", line):
+                blocks[current_key] = current_val
+                current_key = None
+                current_val = []
+            else:
+                current_val.append(line)
+                continue
+        if re.match("%", line):
+            fields = re.split(" +", line.lstrip("%"))
+            if len(fields) == 1:
+                current_key = fields[0]
+            else:
+                variables[fields[0]] = " ".join(fields[1:])
+
+    return variables, blocks
+
+def extract_input_file(lines):
+    input_block = lines.search_for_block("INPUT FILE", "\*\*\*\*END OF INPUT\*\*\*\*", join="\n")
+    input_lines = []
+    for line in input_block.split("\n")[3:]:
+        [_, line] = line.split(">")
+        line = line.lstrip()
+        input_lines.append(line)
+    return input_lines
+
+def read_freqs(lines):
+    freq_block = lines.search_for_block("VIBRATIONAL FREQUENCIES", "NORMAL MODES", join="\n", max_len=1000)
+    if freq_block is None:
+        return []
+    freqs = []
+    for line in freq_block.split("\n"):
+        fields = re.split(" +", line.strip())
+        if len(fields) == 3:
+            if fields[2] == "cm**-1" and float(fields[1]) > 0:
+                freqs.append(float(fields[1]))
+    return freqs
+
+def read_gradients(lines, num_to_find):
+    grad_blocks = lines.search_for_block("Geometry convergence", "Max\(Bonds", join="\n", count=num_to_find)
+    if grad_blocks is None:
+        return
+
+    rms_grad = []
+    max_grad = []
+    rms_step = []
+    max_step = []
+    for grad_block in grad_blocks:
+        if grad_block is None:
+            continue
+        for line in grad_block.split("\n"):
+            fields = re.split(" +", line.strip())
+            if len(fields) == 5:
+                if fields[0] == "RMS" and fields[1] == "gradient":
+                    rms_grad.append(float(fields[2]))
+                if fields[0] == "MAX" and fields[1] == "gradient":
+                    max_grad.append(float(fields[2]))
+                if fields[0] == "RMS" and fields[1] == "step":
+                    rms_step.append(float(fields[2]))
+                if fields[0] == "MAX" and fields[1] == "step":
+                    max_step.append(float(fields[2]))
+
+    return rms_grad, max_grad, rms_step, max_step
+
+def read_nmr_shifts(lines, num_atoms):
+    """
+    Helper method to search through output file and read NMR shifts.
+
+    Args:
+        lines (list): list of lines in file
+        num_atoms (int): number of atoms expected
+
+    Returns:
+        list of isotropic NMR shifts (np.ndarray)
+    """
+    # assumes that lines only come from one Link1 section
+    shieldings = []
+    block = lines.search_for_block("Nucleus  Element", "^$", join="\n")
+    for line in block.split("\n")[2:]:
+        fields = line.split()
+        if len(fields) == 4:
+            try:
+                shielding = float(fields[2])
+            except:
+                raise ValueError(f"Error parsing NMR shielding output line:\n{line}")
+            shieldings.append(shielding)
+
+    if len(shieldings) != 0:
+        assert len(shieldings) == num_atoms, f"Expected {num_atoms} shieldings but found {len(shieldings)}!"
+        return np.asarray(shieldings).view(OneIndexedArray)
+    else:
+        #### we can catch this problem later if the file is finished
+        return None
+
+
diff --git a/build/lib/cctk/pdb_file.py b/build/lib/cctk/pdb_file.py
new file mode 100644
index 0000000..34b3848
--- /dev/null
+++ b/build/lib/cctk/pdb_file.py
@@ -0,0 +1,56 @@
+from cctk import File
+from cctk.helper_functions import get_symbol
+
+class PDBFile(File):
+    """
+    Generic class for all ``.pdb`` files.
+    """
+
+    def __init__(self, molecule, title=None):
+        pass
+
+    @classmethod
+    def read_file(cls, filename):
+        pass
+
+    @classmethod
+    def write_molecule_to_file(cls, filename, molecule, num=1, append=False):
+        """
+        Write a ``.pdb`` file, using object attributes.
+
+        Args:
+            filename (str): path to the new file
+            molecule (Molecule): ``Molecule`` object
+            num (int): model number
+            append (Bool): whether to write to file normally or append
+        """
+        text = f"MODEL {num}\n"
+
+        for idx, Z in enumerate(molecule.atomic_numbers, start=1):
+            line = molecule.get_vector(idx)
+            symb = get_symbol(Z).upper()
+            text += f"HETATM {idx:>4}  {symb:<2}    *     0     {line[0]:7.3f} {line[1]:7.3f} {line[2]:7.3f}  1.00  0.00          {symb:>2}\n"
+
+        text += f"ENDMDL\n"
+
+        if append:
+            super().append_to_file(filename, text)
+        else:
+            super().write_file(filename, text)
+
+
+    @classmethod
+    def write_ensemble_to_trajectory(cls, filename, ensemble):
+        """
+        Writes a ``ConformationalEnsemble`` to a trajectory file.
+
+        Args:
+            filename (str): where to write the file
+            ensemble (Ensemble): ``Ensemble`` object to write
+        """
+        for idx, molecule in enumerate(ensemble.molecules):
+            if idx == 0:
+                cls.write_molecule_to_file(filename, molecule, num=idx+1, append=False)
+            else:
+                cls.write_molecule_to_file(filename, molecule, num=idx+1, append=True)
+
diff --git a/build/lib/cctk/point_charge.py b/build/lib/cctk/point_charge.py
new file mode 100644
index 0000000..e5efeae
--- /dev/null
+++ b/build/lib/cctk/point_charge.py
@@ -0,0 +1,18 @@
+import numpy as np
+
+class PointCharge():
+    """
+    Represents a point charge.
+
+    Attributes:
+        coordinates (np.ndarray): 3-element ndarray
+        charge (float): charge
+    """
+
+    def __init__(self, coordinates, charge):
+        assert isinstance(coordinates, (np.ndarray, list)), "coordinates must be list or ndarray!"
+        assert len(coordinates) == 3, "coordinates must have len 3!"
+        self.coordinates = np.array(coordinates)
+
+        assert isinstance(charge, (float, int)), "charge must be numeric"
+        self.charge = float(charge)
diff --git a/build/lib/cctk/quasiclassical.py b/build/lib/cctk/quasiclassical.py
new file mode 100644
index 0000000..1b09eb1
--- /dev/null
+++ b/build/lib/cctk/quasiclassical.py
@@ -0,0 +1,214 @@
+"""
+Functions to assist in sampling thermally excited states through quasiclassical approximations.
+"""
+
+import numpy as np
+import math, copy, random
+
+import cctk
+
+"""
+Constants:
+"""
+
+AMU_A2_FS2_PER_KCAL_MOL = 0.0004184
+BOLTZMANN_CONSTANT = 0.001985875 # kcal/mol•Kn
+
+def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities=False, which="quasiclassical", mode_options=None):
+    """
+    Perturbs molecule by treating each mode as a quantum harmonic oscillator and sampling from the distribution appropriate to the temperature.
+
+    This is probably the only useful function in this file.
+
+    Args:
+        molecule (cctk.Molecule): molecule with vibrational modes
+        temperature (float): temperature
+        return velocities (bool): whether or not to return velocities
+        which (str): ``classical`` or ``quasiclassical``
+        mode_options (dict):
+            Options for how to initialize specific modes.
+                key (int): 1-indexed number of vibrational mode (from smallest frequency to largest)
+                val (dict):
+                    velocity (str): one of "positive", "negative", "random", "zero"
+                    displacement (bool): whether or not to displace
+
+    Returns:
+        new ``cctk.Molecule`` object
+        energy above ground state (kcal/mol)
+        velocities (cctk.OneIndexedArray)
+    """
+    assert isinstance(molecule, cctk.Molecule), "need a valid molecule"
+    assert len(molecule.vibrational_modes) > 0, "molecule needs to have vibrational modes (try running a ``freq`` job)"
+    assert isinstance(temperature, (int, float)), "temperature must be numeric"
+
+    mol = copy.deepcopy(molecule)
+    total_PE = 0
+    total = 0
+
+    velocities = np.zeros_like(molecule.geometry.view(np.ndarray)).view(cctk.OneIndexedArray)
+
+    if mode_options is None:
+        mode_options = dict()
+
+    all_text = ""
+    for idx, mode in enumerate(mol.vibrational_modes):
+        # enumerate is 0-indexed but GaussView, etc 1-index the modes. so we're 1-indexing here too.
+        if idx+1 in mode_options:
+            PE, KE, TE, mode_velocity, text = apply_vibration(mol, mode, temperature=temperature, which=which, **mode_options[idx+1])
+        else:
+            PE, KE, TE, mode_velocity, text = apply_vibration(mol, mode, temperature=temperature, which=which)
+        total_PE += PE
+        total += TE
+        all_text += f"Mode {idx+1}: {text}\n"
+
+        for idx in range(1,molecule.num_atoms()+1):
+            velocities[idx] += mode_velocity * mode.displacements[idx]
+
+    if return_velocities:
+        return mol, total_PE, total, all_text, velocities
+    else: # backwards compatibility
+        return mol, total_PE, total, all_text
+
+def apply_vibration(molecule, mode, min_freq=50, temperature=298, verbose=False, which="quasiclassical", displacement=True, velocity="random", **kwargs):
+    """
+    Apply a vibration to molecule ``molecule`` (modified in-place).
+
+    Args:
+        molecule (cctk.Molecule)
+        mode (cctk.VibrationalMode)
+        min_freq (float)
+        temperature (float)
+        verbose (bool)
+        which (str): ``quasiclassical`` or ``classical``
+        displacement (bool): whether or not to displace the mode
+        velocity (str): ``positive``, ``negative``, ``random``, or ``zero``
+
+    Returns:
+        potential energy
+        kinetic energy
+        energy
+        velocities
+        text
+    """
+
+    if mode.frequency < 0:
+        which = "ts"
+
+    if which == "quasiclassical":
+        level = mode.choose_level(temperature)
+        energy = mode.energy(level)
+        shift = mode.random_displacement(level=level, method=which)
+        method = f"qc level {level}"
+    elif which == "classical":
+        energy = random_boltzmann_energy(temperature)
+        shift = mode.random_displacement(energy=energy, method=which)
+        method = "classical"
+    elif which == "ts":
+        energy = random_boltzmann_energy(temperature)
+        shift = 0
+        method = "ts"
+    else:
+        raise ValueError(f"``which`` must be ``classical``, ``quasiclassical``, or ``ts`` - {which} does not match!")
+
+    # the rest is common to all methods
+
+    # transition states and low-frequency modes do not get a starting displacement
+    if not displacement or mode.frequency < min_freq:
+        shift = 0
+
+    max_shift = mode.classical_turning_point(energy=energy)
+    if max_shift == 0.0:
+        rel_shift = 0.0
+        print("Warning: attempted to calculate relative shift when max shift is 0!")
+    else:
+        if shift > max_shift:
+            print("Warning: requested shift of {shift:.4E} exceeds the max_shift of {max_shift:.4E}!")
+            shift = max_shift
+        rel_shift = shift/max_shift
+
+    # apply displacements and compute energy breakdown
+    molecule.geometry += mode.displacements * rel_shift * max_shift
+    potential_energy = 0.5 * mode.force_constant * shift ** 2
+    kinetic_energy = energy - potential_energy
+
+    # mode velocity = sqrt(2 * KE / reduced mass) - want value in Å/fs
+    # https://stackoverflow.com/questions/46820182/randomly-generate-1-or-1-positive-or-negative-integer
+    mode_velocity = math.sqrt(2*kinetic_energy*AMU_A2_FS2_PER_KCAL_MOL/mode.reduced_mass)
+
+    # choose velocity sign
+    if velocity == "random":
+        mode_velocity *= (1 if random.random() < 0.5 else -1)
+    elif velocity == "negative":
+        mode_velocity *= -1
+    elif velocity == "zero":
+        mode_velocity = 0
+    elif velocity != "positive":
+        raise ValueError(f"unknown value {velocity} for keywork ``velocity`` - must be ``positive``, ``negative``, ``random``, or ``zero``")
+
+    text = f"{mode.frequency:.1f} cm-1 ({energy:4.2f} kcal/mol)\t{method}\t Shift {shift:5.2f} of {max_shift:4.2f} Å ({rel_shift:5.0%})"
+    text += f"\tPE = {potential_energy:4.2f} kcal/mol\tKE = {kinetic_energy:4.2f} kcal/mol\tk = {mode.force_constant:.1f} kcal/mol Å^-2"
+    if not displacement:
+        text += "\n\t\tDisplacement manually set to zero!\n"
+    if velocity == "zero":
+        text += "\n\t\tVelocity manually set to zero!\n"
+    if verbose:
+        print(text)
+
+    return potential_energy, kinetic_energy, energy, mode_velocity, text
+
+def get_hermite_polynomial(n):
+    """
+    Returns a ``np.poly1d`` object representing the degree-n Hermite polynomial.
+
+    Adapted from https://scipython.com/blog/the-harmonic-oscillator-wavefunctions/.
+    """
+    assert isinstance(n, int) and n >= 0, "need positive integer"
+
+    Hr = [None] * (n + 1)
+    Hr[0] = np.poly1d([1.,])
+
+    if n > 0:
+        Hr[1] = np.poly1d([2., 0.])
+
+    if n > 1:
+        for v in range(2, n+1):
+            Hr[v] = Hr[1]*Hr[v-1] - 2*(v-1)*Hr[v-2]
+    return Hr[n]
+
+def random_boltzmann_energy(temperature, cutoff=10, step1=0.01, step2=0.0001):
+    """
+    Randomly samples from the Boltzmann distribution appropriate for the given temperature.
+
+    Arguments:
+        temperature (int or float): in K
+        cutoff (int or float): max energy considered, in kT
+        step1: coarse numerical step, in kT
+        step2: fine numerical step, in kT
+    """
+    kT = temperature * BOLTZMANN_CONSTANT
+
+    random = np.random.uniform()
+
+    # cumulative Boltzmann
+    cumulative_boltzmann = lambda e: math.erf(math.sqrt(e))
+
+    # now we need to numerically invert the cumulative Boltzmann
+    # kT = 1 for all this math, we'll fix it at the end
+    trial_energy = -1
+
+    # scan up to cutoff kT, which should be more than enough
+    for i in np.arange(0, cutoff, step1):
+        if cumulative_boltzmann(i) > random:
+            trial_energy = i - step1
+            break
+
+    if trial_energy == -1:
+        return cutoff * kT
+
+    # retry in smaller increments
+    for i in np.arange(trial_energy, trial_energy+step1, step2):
+        if cumulative_boltzmann(i) > random:
+            trial_energy = i - step2
+            break
+
+    return trial_energy * kT
diff --git a/build/lib/cctk/si_file.py b/build/lib/cctk/si_file.py
new file mode 100644
index 0000000..985d845
--- /dev/null
+++ b/build/lib/cctk/si_file.py
@@ -0,0 +1,89 @@
+import cctk
+from cctk.helper_functions import get_symbol
+
+
+class SIFile(cctk.File):
+    """
+    Class representing Supporting Information files.
+
+    Attributes:
+        titles (list of str): title of each molecule
+        ensemble (cctk.Ensemble): ``cctk.Ensemble`` of molecules to print
+    """
+
+    def __init__(self, ensemble, titles):
+        if ensemble and isinstance(ensemble, cctk.Ensemble):
+            self.ensemble = ensemble
+        else:
+            raise ValueError(f"invalid ensemble {ensemble}!")
+
+        assert len(titles) == len(ensemble)
+        self.titles = titles
+
+    def write_file(self, filename, write_xyz=False, write_dir=None):
+        """
+        Write an SI file.
+
+        Args:
+            filename (str): path to the new file
+            write_xyz (Bool): whether or not to write ``.xyz`` files for each molecule
+            write_dir (str): where to write them too
+        """
+        first = True
+        for title, (molecule, properties) in zip(self.titles, self.ensemble.items()):
+            assert isinstance(molecule, cctk.Molecule), "molecule is not a valid Molecule object!"
+
+            text = f"{title}\n"
+            for key, value in generate_info(molecule, properties).items():
+                text += f"{key}:\t{value}\n"
+
+            text += f"Cartesian Coordinates (Å):\n"
+            for index, Z in enumerate(molecule.atomic_numbers, start=1):
+                line = molecule.get_vector(index)
+                text += f"{get_symbol(Z):>2}       {line[0]:>13.6f} {line[1]:>13.6f} {line[2]:>13.8f}\n"
+
+            text += "\n"
+
+            if write_xyz and write_dir is not None:
+                cctk.XYZFile.write_molecule_to_file(f"{write_dir}/{title}.xyz", molecule)
+
+            if first:
+                super().write_file(filename, text)
+                first = False
+            else:
+                super().append_to_file(filename, text)
+
+
+def generate_info(molecule, properties):
+    info = {
+        "Number of Atoms": molecule.num_atoms(),
+        "Stoichiometry": molecule.formula(),
+        "Charge": molecule.charge,
+        "Multiplicity": molecule.multiplicity,
+    }
+
+    # for now manually handling route card and imaginaries, which typically aren't linked to cctk.Molecule.
+    # long-term would be good to manually pass an extra info_dict from the calling environment
+    # to avoid these ad hoc carveouts. ccw 3.8.21
+
+    if "route_card" in properties:
+        info["Route Card"] = properties["route_card"]
+
+    if "imaginaries" in properties:
+        info["Imaginary Frequencies (cm-1)"] = properties["imaginaries"]
+    else:
+        info["Imaginary Frequencies (cm-1)"] = "None"
+
+    if "energy" in properties:
+        info["Energy"] = properties["energy"]
+    if "enthalpy" in properties:
+        info["Enthalpy"] = properties["enthalpy"]
+    if "gibbs_free_energy" in properties:
+        info["Gibbs Free Energy"] = properties["gibbs_free_energy"]
+    if "quasiharmonic_gibbs_free_energy" in properties:
+        info["Gibbs Free Energy (Quasiharmonic Correction)"] = properties["quasiharmonic_gibbs_free_energy"]
+    if "dipole_moment" in properties:
+        info["Dipole Moment (Debye)"] = properties["dipole_moment"]
+
+    return info
+
diff --git a/build/lib/cctk/topology.py b/build/lib/cctk/topology.py
new file mode 100644
index 0000000..943728a
--- /dev/null
+++ b/build/lib/cctk/topology.py
@@ -0,0 +1,267 @@
+"""
+Functions to handle 3D topology, graph structure, etc of ``Molecule`` objects.
+
+Moved out of ``cctk.Molecule`` because the file was getting unwieldy.
+"""
+
+import numpy as np
+import networkx as nx
+import copy
+
+from cctk.helper_functions import (
+    compute_chirality,
+)
+
+def are_isomorphic(mol1, mol2, return_ordering=False):
+    """
+    Checks if two molecules are isomorphic (by comparing bond graphs and atomic numbers - not bond orders!).
+
+    Args:
+        mol1 (cctk.Molecule):
+        mol2 (cctk.Molecule):
+        return_ordering (Bool): if True, also returns a mapping between atomic numbers
+
+    Returns:
+        Boolean denoting if the molecules are isomorphic
+        (optional) mapping list
+    """
+    assert mol1.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+    assert mol2.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+
+    mol1._add_atomic_numbers_to_nodes()
+    mol2._add_atomic_numbers_to_nodes()
+
+    nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0)
+    match = nx.algorithms.isomorphism.GraphMatcher(mol1.bonds, mol2.bonds, node_match=nm)
+
+    if match.is_isomorphic():
+        if return_ordering:
+            new_ordering = [match.mapping[x] for x in range(1, mol1.num_atoms() + 1)]
+            return True, new_ordering
+        else:
+            return True
+    else:
+        if return_ordering:
+            return False, None
+        else:
+            return False
+
+def flip_meso_rings(mol, atoms):
+    """
+    Returns a list of permuted molecules with various ``meso`` rings renumbered.
+
+    Args:
+        mol (cctk.Molecule): molecule of interest
+        atoms (list): atomic numbers of potential atoms to consider
+
+    Returns:
+        list of ``Molecule`` objects
+    """
+    #### get all rings in graph
+    returns = [copy.deepcopy(mol)]
+    for center in atoms:
+        cycles = nx.cycle_basis(mol.bonds, root=center)
+        for cycle in cycles:
+            #### get the correct ring
+            if center not in cycle:
+                continue
+
+            #### reorder to put ``center`` first
+            while cycle[0] != center:
+                # why yes, this /is/ a O(n) solution for reordering a list. why do you ask?
+                cycle = cycle[1:] + cycle[0:1]
+            assert cycle[0] == center, "graph reorder failed"
+
+            #### create fragments
+            frag1 = [cycle.pop(1)]
+            frag2 = [cycle.pop(-1)]
+            while len(cycle) > 2:
+                frag1.append(cycle.pop(1))
+                frag2.append(cycle.pop(-1))
+
+            #### cut fragment bonds, depending on if we have even- or odd-numbered ring
+            new_returns = []
+            for mol in returns:
+                cpy = copy.deepcopy(mol)
+                cpy.remove_bond(frag1[0], cycle[0])
+                cpy.remove_bond(frag2[0], cycle[0])
+                if len(cycle) == 1:
+                    cpy.remove_bond(frag1[-1], frag2[-1])
+                elif len(cycle) == 2:
+                    cpy.remove_bond(frag1[-1], cycle[-1])
+                    cpy.remove_bond(frag2[-1], cycle[-1])
+
+                #### generate graphs
+                graph1 = None
+                graph2 = None
+                fragments = nx.connected_components(cpy.bonds)
+                for fragment in fragments:
+                    if frag1[0] in fragment:
+                        graph1 = cpy.bonds.subgraph(fragment)
+                    if frag2[0] in fragment:
+                        graph2 = cpy.bonds.subgraph(fragment)
+
+                assert isinstance(graph1, nx.Graph), "can't find graph 1"
+                assert isinstance(graph2, nx.Graph), "can't find graph 1"
+
+                #### do our two ring-halves match?? if so, we swap them
+                nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0)
+                match = nx.algorithms.isomorphism.GraphMatcher(graph1, graph2, node_match=nm)
+
+                if match.is_isomorphic():
+                    for k,v in match.mapping.items():
+                        cpy = cpy.swap_atom_numbers(k, v)
+
+                    #### redo all the bonds we ablated
+                    if len(cycle) == 1:
+                        cpy.add_bond(frag1[-1], frag2[-1], mol.get_bond_order(frag1[-1], frag2[-1]))
+                    elif len(cycle) == 2:
+                        cpy.add_bond(frag1[-1], cycle[-1], mol.get_bond_order(frag1[-1], cycle[-1]))
+                        cpy.add_bond(frag2[-1], cycle[-1], mol.get_bond_order(frag2[-1], cycle[-1]))
+                    cpy.add_bond(frag1[0], cycle[0], mol.get_bond_order(frag1[0], cycle[0]))
+                    cpy.add_bond(frag2[0], cycle[0], mol.get_bond_order(frag2[0], cycle[0]))
+
+                    new_returns.append(cpy)
+            returns = returns + new_returns
+    return returns
+
+def exchange_identical_substituents(mol, center, self_permutations=None):
+    """
+    Replace homotopic/enantiotopic/diastereotopic substituents about a single atom.
+
+    If a list of permuted ``Molecule`` objects is passed (as ``self_permutations``), then this code will apply this to each member and return a list.
+
+    Args:
+        mol (cctk.Molecule): molecule of interest
+        center (integer): atomic number of atom to swap substituents around
+        self_permutations (list of Molecules): optional list of starting ``Molecule`` objects
+
+    Returns:
+        ``Molecule`` object (or list if ``self_permutations`` is not ``None``)
+    """
+    assert mol.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+    mol._add_atomic_numbers_to_nodes()
+    neighbors = list(mol.bonds[center])
+
+    returns = [copy.deepcopy(mol)]
+    if self_permutations is not None:
+        returns = self_permutations
+
+
+    for i in range(len(neighbors)):
+        for j in range(i+1, len(neighbors)):
+            try:
+                _, frag1 = mol._get_bond_fragments(center, neighbors[i])
+                _, frag2 = mol._get_bond_fragments(center, neighbors[j])
+
+                graph1 = mol.bonds.subgraph(frag1)
+                graph2 = mol.bonds.subgraph(frag2)
+
+                nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0)
+                match = nx.algorithms.isomorphism.GraphMatcher(graph1, graph2, node_match=nm)
+                if match.is_isomorphic():
+                    for m in returns:
+                        new_mol = copy.deepcopy(m)
+                        for k,v in match.mapping.items():
+                            new_mol = new_mol.swap_atom_numbers(k, v)
+                        if self_permutations is None:
+                            return new_mol
+
+                    returns.append(new_mol)
+
+            except ValueError as e:
+                pass # probably indicates a cycle
+
+    if self_permutations is None:
+        raise ValueError("could not find substituents to switch")
+    else:
+        return returns
+
+def get_chirality_report(mol, centers=None):
+    """
+    Computes chirality at stereogenic centers.
+
+    Args:
+        mol (cctk.Molecule): molecule of interest
+        centers (list): atomic numbers to check. defaults to all centers with 4+ substituents.
+
+    Returns:
+        dict with centers as keys and ±1 as values
+    """
+    if centers is None:
+        centers = get_stereogenic_centers(mol)
+    assert isinstance(centers, list)
+
+    results = {}
+    for center in centers:
+        neighbors = list(mol.bonds[center])
+        neighbors.sort()
+        assert len(neighbors) >= 4, f"atom {center} has fewer than 4 neighbors ({neighbors})!"
+        results[center] = compute_chirality(*[mol.get_vector(n, center) for n in neighbors])
+
+    return results
+
+def get_stereogenic_centers(mol):
+    """
+    Returns every atom making 4 or more bonds. A bit misleading, since diastereotopic protons/meso protons are also counted.
+    """
+    assert mol.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+    num_neighbors = np.array([len(list(mol.bonds[x])) for x in range(1, mol.num_atoms() + 1)])
+    return [int(x) for x in list(np.ravel(np.argwhere(num_neighbors >= 4)) + 1)] # love me some off-by-one indexing errors
+
+def get_exchangeable_centers(mol):
+    """
+    Returns all atoms making 4 or more bonds that have two isomorphic substituents, i.e. where renumbering could be broken.
+    """
+    centers = get_stereogenic_centers(mol)
+    exchangeable_centers = []
+    for center in centers:
+        try:
+            exchange_identical_substituents(mol, center)
+            exchangeable_centers.append(center)
+            continue
+        except Exception as e:
+            pass
+
+        mols = flip_meso_rings(mol, atoms=[center])
+        if len(mols) > 1:
+            exchangeable_centers.append(center)
+
+    return exchangeable_centers
+
+def find_group(mol, group):
+    """
+    Finds instances of ``group`` within ``mol``.
+
+    Args:
+        mol (cctk.Molecule): molecule to search within
+        group (cctk.Group): group to search for
+
+    Returns:
+        list of dictionaries mapping from molecule atomic numbers to group atomic numbers
+    """
+    assert mol.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+    assert group.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
+
+    mol._add_atomic_numbers_to_nodes()
+    group._add_atomic_numbers_to_nodes()
+    group_map = group.map_from_truncated()
+    group.remove_atom(group.attach_to)
+
+    nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0)
+    match = nx.algorithms.isomorphism.GraphMatcher(mol.bonds, group.bonds, node_match=nm)
+
+    #### need to only find unique mappings - combinations, not permutations
+    mappings = []
+    for sg in match.subgraph_isomorphisms_iter():
+        unique = True
+        for m in mappings:
+            if set(m.keys()) == set(sg.keys()):
+                unique = False
+                break
+        if unique:
+            mappings.append(sg)
+
+    composition = [{k: group_map[v] for k, v in m.items()} for m in mappings]
+    return composition
+
diff --git a/build/lib/cctk/vibrational_mode.py b/build/lib/cctk/vibrational_mode.py
new file mode 100644
index 0000000..28dfaab
--- /dev/null
+++ b/build/lib/cctk/vibrational_mode.py
@@ -0,0 +1,217 @@
+import math
+import numpy as np
+
+import cctk
+from cctk.quasiclassical import get_hermite_polynomial
+
+# constants
+MAX_QHO_LEVEL = 10000
+MIN_FREQUENCY = 2
+MIN_TEMPERATURE = 10
+MAX_ZPE_RATIO = 0.999999
+
+BOLTZMANN_CONSTANT = 0.001985875 # kcal/mol•K
+
+class VibrationalMode:
+    """
+    Most code adapted from ``jprogdyn``. Displacements will be very low accuracy unless ``freq=hpmodes`` is enabled.
+
+    Values from Gaussian, for now: see https://gaussian.com/vib/.
+
+    Attributes:
+        frequency (float): frequency, in cm-1
+        force_constant (float): force constant, in kcal/mol per Å
+        reduced_mass (float): mass, in amus
+        intensity (float): IR intensity
+        displacements (cctk.OneIndexedArray): atom displacements
+        velocities (cctk.OneIndexedArray): atom velocities
+
+    """
+    def __init__(self, frequency, force_constant, reduced_mass, intensity, displacements):
+        assert isinstance(frequency, float)
+        self.frequency = frequency
+
+        assert isinstance(force_constant, float)
+        self.force_constant = force_constant
+
+        assert isinstance(reduced_mass, float)
+        self.reduced_mass = reduced_mass
+
+        assert isinstance(intensity, float)
+        self.intensity = intensity
+
+        assert isinstance(displacements, cctk.OneIndexedArray)
+        self.displacements = displacements
+
+    def __str__(self):
+        return f"Vibrational mode ({self.frequency:.2f} cm-1, {self.reduced_mass:.2f} amus, {self.force_constant:.2f} kcal/mol Å**-2)"
+
+    def __repr__(self):
+        return f"Vibrational mode ({self.frequency:.2f} cm-1, {self.reduced_mass:.2f} amus, {self.force_constant:.2f} kcal/mol Å**-2)"
+
+    def choose_level(self, temperature=298):
+        if temperature < MIN_TEMPERATURE:
+            return 0
+
+        # zpe_ratio is probability of being in level i vs level i+1, by quantum harmonic oscillator
+        zpe_ratio = math.exp( -2 * self.energy() / (BOLTZMANN_CONSTANT * temperature))
+        if zpe_ratio > MAX_ZPE_RATIO:
+            zpe_ratio = MAX_ZPE_RATIO
+
+        # probability of being in state 0 is equal to 1 - zpe_ratio
+        # 1 = P(0) + P(1) + P(2) + ... = P + P * zpe_ratio + P * zpe_ratio ** 2 + ...
+        # 1 = P(0) / (1 - zpe_ratio) bc geometric series
+        P = 1.0 - zpe_ratio
+
+        random = np.random.uniform()
+        level = 0
+        while level < MAX_QHO_LEVEL:
+            if random < P:
+                return level
+            else:
+                P += P * zpe_ratio
+                level += 1
+
+        return level
+
+    def energy(self, level=0):
+        """
+        Calculate energy as a function of level. By default returns zero-point energy (level = 0).
+
+        Args:
+            level (int): which vibrational level the mode is in
+
+        Returns:
+            energy (kcal/mol)
+        """
+        assert isinstance(level, int) and level >= 0, "need positive integer for vibrational level"
+
+        freq = self.frequency
+        if freq < MIN_FREQUENCY:
+            freq = MIN_FREQUENCY
+
+        # 0.5 * h * c * frequency (c in cm/s bc wavenumbers)
+        # 0.5 * (6.626 * 10**-34) * (3 * 10**10) * (6.026 * 10**23) / 4184) = 0.0014305 
+        zpe = 0.0014305 * freq
+        return zpe * (2 * level + 1)
+
+    def random_displacement(self, energy=None, level=0, method="quasiclassical", max_attempts=1e5):
+        """
+        Args:
+            energy (float): energy of mode (for classical case)
+            method (str): "quasiclassical" or "classical"
+            level (int): which vibrational level
+            max_attempts (int): how many tries you get
+
+        Returns:
+            shift
+        """
+        if method == "quasiclassical":
+            min_val = 0
+            max_val = self.quantum_distribution_max(level)
+            max_x = self.classical_turning_point()
+
+            attempts = 0
+            while attempts < max_attempts:
+                x = np.random.uniform(-1 * max_x, max_x)
+                p = self.quantum_distribution_value(x, level)
+
+                y = np.random.uniform(min_val, max_val)
+                if y < p:
+                    return x
+                else:
+                    attempts += 1
+
+            raise ValueError("max_attempts exceeded - can't get a proper initialization for this mode!")
+        elif method == "classical":
+            assert energy is not None, "need energy for classical displacement"
+            min_val = self.classical_distribution_value(0)
+            max_x = self.classical_turning_point(energy=energy)
+            max_val = self.classical_distribution_value(max_x)
+
+            attempts = 0
+            while attempts < max_attempts:
+                x = np.random.uniform(-1*max_x, max_x)
+                p = self.classical_distribution_value(max_x)
+
+                y = np.random.uniform(min_val, max_val)
+                if y < p:
+                    return x
+                else:
+                    attempts += 1
+        else:
+            raise ValueError(f"invalid method {method} - only ``quasiclassical`` and ``classical`` implemented currently!")
+
+        raise ValueError("Max attempts exceeded!")
+
+    def quantum_distribution_value(self, x, level=0):
+        """
+        Calculate psi**2 for quantum harmonic oscillator for a given shift in Å.
+
+        Args:
+            x (float): shift in Å
+            level (int): vibrational level
+        """
+        assert isinstance(level, int) and level >= 0, "need positive integer for vibrational level"
+
+        freq = self.frequency
+        if freq < MIN_FREQUENCY:
+            freq = MIN_FREQUENCY
+
+        n = level # brevity is the soul of wit
+        H = get_hermite_polynomial(n)
+
+        # following https://github.com/ekwan/Jprogdyn/blob/master/src/main/java/edu/harvard/chemistry/ekwan/Jprogdyn/HarmonicOscillatorDistribution.java, line 109
+        # 4 * pi * 3 * 10**8 / (1000 * 10**20 * 6.022 * 10**23 * 6.626 * 10^-34) = 0.000094411, take it or leave it
+        omega_term = 9.4411e-5 * self.reduced_mass * freq
+        val = math.sqrt(omega_term) * math.exp(-1 * omega_term * math.pi * x ** 2 ) * (H(math.sqrt(omega_term * math.pi) * x) ** 2) / (2 ** n * math.factorial(n))
+        return val
+
+    def quantum_distribution_max(self, level=0, num_pts=1e4):
+        """
+        Returns the maximum value of psi**2 for the quantum harmonic oscillator at a given level.
+        """
+        assert isinstance(level, int) and level >= 0, "need positive integer for vibrational level"
+
+        if level == 0:
+            return self.quantum_distribution_value(0)
+
+        max_x = self.classical_turning_point()
+
+        # there is certainly a better way to do this
+        max_p = 0
+        for x in np.linspace(0, max_x, int(num_pts)):
+            p = self.quantum_distribution_value(x, level)
+            if p > max_p:
+                max_p = p
+
+        return max_p
+
+    def classical_distribution_value(self, x):
+        """
+        Returns the value of the classical distribution at the specified ``x`` value.
+        """
+        max_x = self.classical_turning_point()
+        assert (x <= max_x) and (x >= -1*max_x), "x must be in [-max_x, max_x]"
+        return 1/(math.pi * math.sqrt(max_x**2 - x**2))
+
+    def classical_turning_point(self, energy=None):
+        """
+        Returns the maximum allowed shift based on modelling the mode as a classical harmonic oscillator (e.g. the point where potential energy is maximum).
+
+        Args:
+            energy (float): energy of mode
+            level (int): level to compute energy for quantum harmonic oscillator
+        """
+        if energy is None:
+            energy = self.energy()
+        else:
+            assert energy > 0, "cannot request turning point for 0 energy!"
+
+        return math.sqrt(2 * energy / self.force_constant)
+
+    def to_string(self):
+        ...
+
+    def from_string(self):
+        ...
diff --git a/build/lib/cctk/xyz_file.py b/build/lib/cctk/xyz_file.py
new file mode 100644
index 0000000..8a1a3aa
--- /dev/null
+++ b/build/lib/cctk/xyz_file.py
@@ -0,0 +1,190 @@
+import re, warnings
+import numpy as np
+
+import cctk
+from cctk.helper_functions import get_symbol, get_number
+
+
+class XYZFile(cctk.File):
+    """
+    Class representing plain ``.xyz`` files.
+
+    Attributes:
+        titles (list of str): the title or titles from the file
+        ensemble (Ensemble): `Ensemble` instance
+        molecule (Molecule): `Molecule` instance representing the first molecule in the file. deprecated, but present for backwards compatibility.
+    """
+
+    def __init__(self, ensemble, titles):
+        assert isinstance(ensemble, cctk.Ensemble), "ensemble must be cctk.Ensemble"
+        self.ensemble = ensemble
+
+        # backwards compatibility
+        self.molecule = ensemble.molecule_list()[0]
+
+        assert isinstance(titles, list), "title must be list"
+        self.titles = titles
+
+    def __getattribute__(self, name):
+        if name == "molecule":
+            warnings.warn("XYZFile attribute ``molecule`` will be removed in upcoming releases of cctk. Use ``ensemble`` attribute instead!", DeprecationWarning, stacklevel=2)
+        return cctk.File.__getattribute__(self, name)
+
+    @classmethod
+    def read_file(cls, filename, charge=0, multiplicity=1, conformational=False):
+        """
+        Factory method to create new XYZFile instances.
+
+        Arguments:
+            filename (str): path to ``.xyz`` file
+            charge (int): charge of resultant molecule
+            multiplicity (int): multiplicity of resultant molecule
+            conformational (bool): whether or not it's a conformational ensemble
+        """
+        assert isinstance(charge, int), "charge must be integer"
+        assert isinstance(multiplicity, int), "multiplicity must be integer"
+        assert multiplicity > 0, "multiplicity must be a positive integer"
+
+        ensemble = cctk.Ensemble()
+        if conformational:
+            ensemble = cctk.ConformationalEnsemble()
+        titles = list()
+
+        lines = super().read_file(filename)
+        current_lines = list()
+        for line in lines:
+            if re.search(r"^\s*\d+$", line) and len(current_lines) > 2:
+                if len(current_lines) > 0:
+                    t, m = cls.mol_from_lines(current_lines, charge=charge, multiplicity=multiplicity)
+                    ensemble.add_molecule(m)
+                    titles.append(t)
+                    current_lines = list()
+            current_lines.append(line)
+
+        # catch the last molecule
+        if len(current_lines) > 0:
+            t, m = cls.mol_from_lines(current_lines, charge=charge, multiplicity=multiplicity)
+            ensemble.add_molecule(m)
+            titles.append(t)
+
+        return XYZFile(ensemble, titles)
+
+    @classmethod
+    def mol_from_lines(cls, lines, charge=0, multiplicity=1):
+        num_atoms = 0
+        try:
+            num_atoms = int(lines[0])
+        except:
+            raise ValueError("can't get the number of atoms from the first line!")
+
+        title = lines[1]
+
+        atomic_numbers = np.zeros(shape=num_atoms, dtype=np.int8)
+        geometry = np.zeros(shape=(num_atoms, 3))
+
+        for index, line in enumerate(lines[2:]):
+            # ignore blank lines
+            if len(line.strip()) == 0:
+                continue
+
+            pieces = list(filter(None, line.split(" ")))
+            try:
+                if re.match("[0-9]", pieces[0]):
+                    atomic_numbers[index] = int(pieces[0])
+                elif re.match("([A-Za-z])+([0-9])+", pieces[0]):
+                    # mdtraj writes in this format, for some reason
+                    m = re.match("([A-Za-z])+([0-9])+", pieces[0])
+                    atomic_numbers[index] = int(get_number(m.group(1)))
+                else:
+                    atomic_numbers[index] = int(get_number(pieces[0]))
+                geometry[index][0] = float(pieces[1])
+                geometry[index][1] = float(pieces[2])
+                geometry[index][2] = float(pieces[3])
+            except:
+                raise ValueError(f"can't parse line {index+2}: {line}")
+
+        assert num_atoms == len(atomic_numbers), "wrong number of atoms!"
+        molecule = cctk.Molecule(atomic_numbers, geometry, charge=charge, multiplicity=multiplicity)
+        return title, molecule
+
+    @classmethod
+    def write_molecule_to_file(cls, filename, molecule, title="title", append=False):
+        """
+        Write an ``.xyz`` file, using object attributes.
+
+        Args:
+            filename (str): path to the new file
+            molecule (Molecule): molecule to write
+            title (str): title of file
+            append (Bool): whether or not to append to file
+        """
+        assert isinstance(molecule, cctk.Molecule), "molecule is not a valid Molecule object!"
+
+        text = f"{molecule.num_atoms()}\n"
+        text += f"{title}\n"
+
+        for index, Z in enumerate(molecule.atomic_numbers, start=1):
+            line = molecule.get_vector(index)
+            text += f"{get_symbol(Z):>2}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
+
+        if append:
+            super().append_to_file(filename, text)
+        else:
+            super().write_file(filename, text)
+
+    def write_file(self, filename, idx=-1):
+        """
+        Write an ``.xyz`` file, using object attributes.
+
+        Args:
+            idx (int): the index of the molecule to write
+        """
+        assert isinstance(idx, int), "idx must be int"
+        self.write_molecule_to_file(filename, self.get_molecule(idx), title=self.titles[idx])
+
+    @classmethod
+    def read_trajectory(cls, filename, **kwargs):
+        """
+        Post refactoring, just an alias for ``XYZFile.read_file()``.
+        """
+        return cls.read_file(filename, **kwargs)
+
+    @classmethod
+    def read_ensemble(cls, filename, **kwargs):
+        """
+        Post refactoring, just an alias for ``XYZFile.read_file()``.
+        """
+        return cls.read_file(filename, **kwargs)
+
+    @classmethod
+    def write_ensemble_to_file(cls, filename, ensemble, title=None):
+        """
+        Write a ``cctk.Ensemble`` to a single ``.xyz`` file. Can be viewed in MOLDEN.
+        """
+        assert isinstance(ensemble, cctk.Ensemble), f"ensemble {ensemble} is not a cctk.Ensemble"
+
+        if title is None:
+            title = "title"
+        if isinstance(title, str):
+            title = [title for _ in range(len(ensemble))]
+        assert len(title) == len(ensemble)
+
+        for idx, (molecule, title) in enumerate(zip(ensemble._items, title)):
+            if idx == 0:
+                cls.write_molecule_to_file(filename, molecule, title=title, append=False)
+            else:
+                cls.write_molecule_to_file(filename, molecule, title=title, append=True)
+
+    def get_molecule(self, num=None):
+        """
+        Returns a given molecule.
+
+        If ``num`` is specified, returns ``self.ensemble.molecule_list()[num]``
+        """
+        # some methods pass num=None, which overrides setting the default above
+        if num is None:
+            num = -1
+        assert isinstance(num, int), "num must be int"
+        return self.ensemble.molecule_list()[num]
+
+
diff --git a/cctk/molecule.py b/cctk/molecule.py
index 2ccd3d3..50f8597 100644
--- a/cctk/molecule.py
+++ b/cctk/molecule.py
@@ -9,6 +9,7 @@
 from cctk.helper_functions import (
     get_symbol,
     get_number,
+    get_avg_mass,
     compute_rotation_matrix,
     compute_distance_between,
     compute_angle_between,
@@ -1830,3 +1831,45 @@ def coulomb_analysis(self, atoms1, atoms2, charges):
                 energy += Q[i][j] / R[i][j]
 
         return energy * 627.509 # convert to kcal/mol
+
+    def center_of_mass(self):
+        """
+        Returns the center-of-mass of the molecule, as a ``np.array``.
+        """
+        masses = cctk.OneIndexedArray([get_avg_mass(z) for z in self.atomic_numbers]).reshape(-1,1)
+        return np.sum(masses * self.geometry, axis=0) / np.sum(masses)
+
+    def principal_axes_of_rotation(self):
+        """
+        Compute principal axes of rotation and corresponding moments of inertia.
+
+        See Jprogdyn, RotationalBoltzmann, lines 48–115.
+
+        Returns:
+            moments of intertia (3-element np.array) - some may be zero
+            axes of rotation (3 x 3 np.array)
+        """
+        # move everything to the center of mass (on a copy, let's not get too crazy here)
+        com = self.center_of_mass()
+        positions = copy.deepcopy(self.geometry.view(np.ndarray))
+        positions += -1 * com
+
+        masses = np.array([get_avg_mass(z) for z in self.atomic_numbers]).reshape(-1,1)
+        np.testing.assert_allclose(np.sum(masses * positions, axis=0) / np.sum(masses), 0, atol=0.00001)
+
+        # build up mass moment of inertia tensor
+        Ixx, Ixy, Ixz, Iyy, Iyz, Izz = 0, 0, 0, 0, 0, 0
+        for mass, position in zip(masses, positions):
+            Ixx += mass * (position[2]*position[2] + position[1]*position[1])
+            Iyy += mass * (position[0]*position[0] + position[2]*position[2])
+            Izz += mass * (position[0]*position[0] + position[1]*position[1])
+            Ixy -= mass * position[0] * position[1]
+            Ixz -= mass * position[0] * position[2]
+            Iyz -= mass * position[1] * position[2]
+
+        I = np.array([[Ixx, Ixy, Ixz], [Ixy, Iyy, Iyz], [Ixz, Iyz, Izz]]).reshape(3,3)
+
+        # now we do an eigendecomposition on that tensor
+        return np.linalg.eigh(I)
+
+
diff --git a/cctk/quasiclassical.py b/cctk/quasiclassical.py
index 1b09eb1..2bcbc27 100644
--- a/cctk/quasiclassical.py
+++ b/cctk/quasiclassical.py
@@ -12,9 +12,10 @@
 """
 
 AMU_A2_FS2_PER_KCAL_MOL = 0.0004184
-BOLTZMANN_CONSTANT = 0.001985875 # kcal/mol•Kn
+BOLTZMANN_CONSTANT = 0.001985875 # kcal/mol•K
+TEMP_TO_eV = 8.61733238e-5 # eV/K
 
-def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities=False, which="quasiclassical", mode_options=None):
+def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities=False, which="quasiclassical", mode_options=None, do_rotation=True):
     """
     Perturbs molecule by treating each mode as a quantum harmonic oscillator and sampling from the distribution appropriate to the temperature.
 
@@ -31,6 +32,7 @@ def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities
                 val (dict):
                     velocity (str): one of "positive", "negative", "random", "zero"
                     displacement (bool): whether or not to displace
+        do_rotation (bool): whether or not to apply classical rotational initialization.
 
     Returns:
         new ``cctk.Molecule`` object
@@ -64,6 +66,37 @@ def get_quasiclassical_perturbation(molecule, temperature=298, return_velocities
         for idx in range(1,molecule.num_atoms()+1):
             velocities[idx] += mode_velocity * mode.displacements[idx]
 
+    if do_rotation:
+        moments, axes_of_rotation = mol.principal_axes_of_rotation()
+        omega_axis1, omega_axis2, omega_axis3 = 0, 0, 0
+
+        # get energy for each principal axis, and convert it to angular frequency
+        # we randomize the sign here...
+        # energy in kcal/mol, unlike Jprogdyn
+        energy_axis1 = random_boltzmann_energy(temperature)
+        if moments[0] > 0:
+            omega_axis1 = (1 if random.random() < 0.5 else -1) * np.sqrt(2*energy_axis1 / (moments[0]*AMU_A2_FS2_PER_KCAL_MOL))
+
+        energy_axis2 = random_boltzmann_energy(temperature)
+        if moments[1] > 0:
+            omega_axis2 = (1 if random.random() < 0.5 else -1) * np.sqrt(2*energy_axis2 / (moments[0]*AMU_A2_FS2_PER_KCAL_MOL))
+
+        energy_axis3 = random_boltzmann_energy(temperature)
+        if moments[2] > 0:
+            omega_axis3 = (1 if random.random() < 0.5 else -1) * np.sqrt(2*energy_axis3 / (moments[0]*AMU_A2_FS2_PER_KCAL_MOL))
+
+        # add energy to total energy counter
+        total += energy_axis1 + energy_axis2 + energy_axis3
+
+        # total rotational velocity is linear combination along principal axes
+        omega = omega_axis1*axes_of_rotation[0] + omega_axis2*axes_of_rotation[1] + omega_axis3*axes_of_rotation[2]
+
+        # now turn this into Cartesian velocity for each atom
+        shifted_positions = copy.deepcopy(mol.geometry)
+        shifted_positions -= mol.center_of_mass()
+        for idx in range(1, mol.num_atoms()+1):
+            velocities[idx] += np.cross(omega, shifted_positions[idx])
+
     if return_velocities:
         return mol, total_PE, total, all_text, velocities
     else: # backwards compatibility
diff --git a/setup.py b/setup.py
index 596831c..bbe3938 100644
--- a/setup.py
+++ b/setup.py
@@ -11,16 +11,17 @@
     packages=["cctk", "cctk.data", "cctk.groups"],
 #    include_package_data=True,
     package_data={"cctk.data": ["*"], "cctk.groups": ["*"],},
-    version="v0.2.13",
+    version="v0.2.14",
     license="Apache 2.O",
     description="computational chemistry toolkit",
     author="Corin Wagen and Eugene Kwan",
     author_email="corin.wagen@gmail.com",
     url="https://github.com/ekwan/cctk",
-    download_url="https://github.com/ekwan/cctk/archive/v0.2.13.tar.gz",
+    download_url="https://github.com/ekwan/cctk/archive/v0.2.14.tar.gz",
     install_requires=["numpy", "networkx", "importlib_resources", "scipy", "pyahocorasick", "basis_set_exchange", "pyyaml"],
     long_description=long_description,
     long_description_content_type='text/markdown',
+    python_requires='>=3.6',
     classifiers=[
         "Development Status :: 4 - Beta",
         "License :: OSI Approved :: Apache Software License",
diff --git a/test/static/h2.xyz b/test/static/h2.xyz
new file mode 100644
index 0000000..1d25adb
--- /dev/null
+++ b/test/static/h2.xyz
@@ -0,0 +1,4 @@
+2
+HeH
+H    0    0    0
+H    0    0    0.77260
diff --git a/test/test_freqs.py b/test/test_freqs.py
index 6343553..0cb43ad 100644
--- a/test/test_freqs.py
+++ b/test/test_freqs.py
@@ -72,7 +72,7 @@ def test_perturb_water(self):
             3: {"velocity": "zero"},
         }
 
-        mol3, e, _, _, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, mode_options=mo)
+        mol3, e, _, _, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, mode_options=mo, do_rotation=False)
         self.assertTrue(isinstance(mol3, cctk.Molecule))
         self.assertFalse(np.any(v)) # all should be zero, AKA False
 
@@ -81,7 +81,7 @@ def test_perturb_water(self):
             2: {"velocity": "positive", "displacement": False},
             3: {"velocity": "positive", "displacement": False},
         }
-        mol4, e, te, text, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, mode_options=mo)
+        mol4, e, te, text, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, mode_options=mo, do_rotation=False)
         self.assertTrue(te - 13.28839457 < 0.00001)
 
         mol5, e, te, text, v = qc.get_quasiclassical_perturbation(mol, return_velocities=True, which="classical")
diff --git a/test/test_molecule.py b/test/test_molecule.py
index b3509fb..55ad810 100644
--- a/test/test_molecule.py
+++ b/test/test_molecule.py
@@ -228,5 +228,17 @@ def test_coulomb_analysis(self):
 
 #        print(mol.coulomb_analysis(atoms1, atoms2, charges))
 
+    def test_rotation(self):
+        mol = self.load_molecule()
+        m1, a1 = mol.principal_axes_of_rotation()
+        ([ 672.04715793, 2908.52501403, 3481.42757748])
+        self.assertTrue(m1[0] - 672.05 < 0.1)
+        self.assertTrue(m1[1] - 2908.53 < 0.1)
+        self.assertTrue(m1[2] - 3841.43 < 0.1)
+
+        h2 = cctk.XYZFile.read_file("test/static/h2.xyz").get_molecule()
+        m2, a2, = h2.principal_axes_of_rotation()
+        self.assertEqual(m2[0], 0.0) # first moment ought to be zero
+
 if __name__ == '__main__':
     unittest.main()