Skip to content

Commit

Permalink
Added fast conformational searches + minor cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
RaphaelRobidas committed Aug 12, 2023
1 parent 30e8e94 commit 6789e00
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 60 deletions.
2 changes: 2 additions & 0 deletions cloud_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ psycopg2
pysisyphus
requests
scipy
sklearn
spyrmsd
stripe
wheel
xkcdpass
55 changes: 7 additions & 48 deletions frontend/management/commands/init_static_obj.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,131 +83,90 @@ def handle(self, *args, **options):
"opt",
"Geometry",
creates_ensemble=True,
avail_xtb=True,
avail_Gaussian=True,
avail_ORCA=True,
avail_NWChem=True,
)

self.add_step(
"Conformational Search",
"conf_search",
"Conformer ensemble",
creates_ensemble=True,
avail_xtb=True,
avail_Gaussian=False,
avail_ORCA=False,
avail_NWChem=False,
)

self.add_step(
"Constrained Optimisation",
"constr_opt",
"Geometry with constraint",
creates_ensemble=True,
avail_xtb=True,
avail_Gaussian=True,
avail_ORCA=True,
# avail_NWChem=False, # For now
)

self.add_step(
"Frequency Calculation",
"freq",
"Vibrational modes, IR spectrum and thermochemistry",
creates_ensemble=False,
avail_xtb=True,
avail_Gaussian=True,
avail_ORCA=True,
avail_NWChem=True,
)

self.add_step(
"TS Optimisation",
"optts",
"Transition state geometry",
creates_ensemble=True,
avail_xtb=True,
avail_Gaussian=True,
avail_ORCA=True,
avail_NWChem=True,
)

self.add_step(
"UV-Vis Calculation",
"uvvis",
"UV-Vis spectrum",
creates_ensemble=False,
avail_xtb=True,
avail_Gaussian=True,
avail_ORCA=False,
avail_NWChem=False,
)

self.add_step(
"NMR Prediction",
"nmr",
"NMR spectrum",
creates_ensemble=False,
avail_xtb=False,
avail_Gaussian=True,
avail_ORCA=True,
avail_NWChem=False, # For now
)

self.add_step(
"Single-Point Energy",
"sp",
"Electronic energy",
creates_ensemble=False,
avail_xtb=True,
avail_Gaussian=True,
avail_ORCA=True,
avail_NWChem=True,
)

self.add_step(
"MO Calculation",
"mo",
"Molecular Orbitals",
creates_ensemble=False,
avail_xtb=False,
avail_Gaussian=False,
avail_ORCA=True,
avail_NWChem=True,
)

self.add_step(
"Minimum Energy Path",
"mep",
"Minimum Energy Path",
creates_ensemble=True,
avail_xtb=True,
avail_Gaussian=False,
avail_ORCA=True,
avail_NWChem=False,
)

self.add_step(
"Constrained Conformational Search",
"constr_conf_search",
"Conformer ensemble with constraint",
creates_ensemble=True,
avail_xtb=True,
avail_Gaussian=False,
avail_ORCA=False,
avail_NWChem=False,
)

self.add_step(
"ESP Calculation",
"esp",
"Electrostatic potential map",
creates_ensemble=False,
avail_xtb=False,
avail_Gaussian=False,
avail_ORCA=False,
avail_NWChem=True,
)

self.add_step(
"Fast Conformational Search",
"fast_conf_search",
"Preliminary conformer ensemble",
creates_ensemble=True,
)

title = "NHC-Catalysed Condensation"
Expand Down
158 changes: 154 additions & 4 deletions frontend/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@
from shutil import copyfile, rmtree
import time

from rdkit import Chem
from rdkit.Chem import AllChem
import spyrmsd
from spyrmsd.optional.rdkit import to_molecule
from spyrmsd.rmsd import rmsdwrapper
from sklearn.cluster import DBSCAN

if not settings.IS_CLOUD:
from calcus.celery import app
from celery.signals import task_prerun, task_postrun
Expand Down Expand Up @@ -337,7 +344,14 @@ def testing_delay_cloud(calc_id):
return ErrorCodes.SUCCESS


def system(command, log_file="", force_local=False, software="xtb", calc_id=-1):
def system(
command,
log_file="",
force_local=False,
software="xtb",
calc_id=-1,
time_cumul=False,
):
if REMOTE and not force_local:
assert calc_id != -1

Expand Down Expand Up @@ -463,7 +477,8 @@ def system(command, log_file="", force_local=False, software="xtb", calc_id=-1):
if calc_id != -1:
calc = Calculation.objects.get(pk=calc_id)
calc.status = 1
calc.date_started = timezone.now()
if calc.date_started is None or not time_cumul:
calc.date_started = timezone.now()
calc.save()
res = AbortableAsyncResult(calc.task_id)

Expand Down Expand Up @@ -1525,6 +1540,135 @@ def crest(in_file, calc):
return ErrorCodes.SUCCESS


def fast_conf(in_file, calc):
local_folder = os.path.join(CALCUS_SCR_HOME, str(calc.id))
folder = f"scratch/calcus/{calc.id}" # Remote

calc_start = timezone.now()

subprocess.call(shlex.split("obabel in.xyz -O in.mol"), cwd=local_folder)

mol = Chem.MolFromMolFile(os.path.join(local_folder, "in.mol"))

mol.UpdatePropertyCache()
mol = Chem.AddHs(mol)

confs = list(
AllChem.EmbedMultipleConfs(
mol, numConfs=50, maxAttempts=1000, pruneRmsThresh=0.05
)
)
mols = []
props = []

for num in confs:
cpath = os.path.join(local_folder, f"conf{num+1}")
os.makedirs(cpath, exist_ok=True)

Chem.rdmolfiles.MolToXYZFile(mol, os.path.join(cpath, "in.xyz"), confId=num)

os.chdir(cpath)

# We want to add up all the time required for the calculations.
# We will overwrite this with a more accurate time afterwards, but this makes the time preview
# a bit less strange.
ret = system(
calc.command,
os.path.join(cpath, "calc.out"),
software="xtb",
calc_id=calc.id,
time_cumul=True,
)

# duplicate code
if ret != ErrorCodes.SUCCESS:
logger.error(f"Failed to optimize conformer {num} of calc {calc.id}")
continue

with open(f"{cpath}/xtbopt.xyz") as f:
xyz = clean_xyz("".join(f.readlines()))

with open(f"{cpath}/calc.out") as f:
lines = f.readlines()
ind = len(lines) - 1

try:
while lines[ind].find("TOTAL ENERGY") == -1:
ind -= 1
E = float(lines[ind].split()[3])
except IndexError:
logger.error(
f"Could not parse the output of calculation {calc.id}: invalid format"
)
continue

rdmol = Chem.MolFromMolFile(os.path.join(cpath, "xtbtopo.mol"))

smol = to_molecule(rdmol)

mols.append(smol)
props.append((E, xyz))

def rmsd_cluster(mols, props, threshold=0.8):
# Could be too slow if there are many structures
dist = np.zeros((len(mols), len(mols)))
for i, (mol1, prop1) in enumerate(zip(mols, props)):
for j, (mol2, prop2) in enumerate(zip(mols, props)):
if j <= i:
continue
try:
d = rmsdwrapper(
mol1, mol2, symmetry=True, center=True, minimize=True
)[0]
except spyrmsd.exceptions.NonIsomorphicGraphs:
d = 10
logger.error(f"Non-isomorphic graphs: {i} and {j} (calc {calc.id})")

dist[i, j] = d
dist[j, i] = d

clustering = DBSCAN(eps=threshold, min_samples=1, metric="precomputed").fit(
dist
)

unique_mol = []
unique_prop = []
known = [] # Known cluster labels
for ind, pos in enumerate(clustering.labels_):
if pos in known:
continue
known.append(pos)
unique_mol.append(mols[ind])
unique_prop.append(props[ind])

return unique_mol, unique_prop

umols, uprops = rmsd_cluster(mols, props, threshold=0.5)
for ind, (mol, (E, xyz)) in enumerate(
sorted(zip(umols, uprops), key=lambda i: i[1])
):
s = Structure.objects.get_or_create(
parent_ensemble=calc.result_ensemble,
number=ind + 1,
)[0]
s.degeneracy = 1
s.xyz_structure = xyz
prop = get_or_create(calc.parameters, s)
prop.energy = E
prop.geom = True
s.save()
prop.save()

calc_end = timezone.now()

# Set the true start and end dates, including the conformational sampling and parsing
calc.date_started = calc_start
calc.date_finished = calc_end
calc.save()

return ErrorCodes.SUCCESS


def clean_struct_line(line):
a, x, y, z = line.split()
return f"{LOWERCASE_ATOMIC_SYMBOLS[a.lower()]} {x} {y} {z}\n"
Expand Down Expand Up @@ -2747,13 +2891,18 @@ def calc_to_ccinput(calc):
_nproc = calc.order.resource.pal
_mem = calc.order.resource.memory

_step_name = calc.step.name

if calc.parameters.software.lower() == "xtb" and (
calc.step.name == "Fast Conformational Search"
):
_step_name = "OPT"

# patch
if calc.parameters.software.lower() == "nwchem" and (
calc.step.name == "MO Calculation" or calc.step.name == "ESP Calculation"
):
_step_name = "SP"
else:
_step_name = calc.step.name

params = {
"software": calc.parameters.software,
Expand Down Expand Up @@ -4108,6 +4257,7 @@ def get_Gaussian_xyz(text):
"Single-Point Energy": xtb_sp,
"Minimum Energy Path": mep,
"Constrained Conformational Search": crest,
"Fast Conformational Search": fast_conf,
},
"ORCA": {
"NMR Prediction": orca_nmr,
Expand Down
3 changes: 0 additions & 3 deletions frontend/templates/frontend/batch_calculations.html
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,6 @@
});
});
</script>




<button class="button is-primary">Submit</button>
</form>
Expand Down
Loading

0 comments on commit 6789e00

Please sign in to comment.