From 6789e004562ef3f2127a76d38d573e36a8626385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Robidas?= Date: Fri, 11 Aug 2023 20:02:03 -0400 Subject: [PATCH] Added fast conformational searches + minor cleanup --- cloud_requirements.txt | 2 + .../management/commands/init_static_obj.py | 55 +----- frontend/tasks.py | 158 +++++++++++++++++- .../frontend/batch_calculations.html | 3 - .../templates/frontend/form/availability.js | 16 +- .../templates/frontend/form/type_body.html | 1 + requirements.txt | 3 + 7 files changed, 178 insertions(+), 60 deletions(-) diff --git a/cloud_requirements.txt b/cloud_requirements.txt index 5817c34b..b80346b1 100644 --- a/cloud_requirements.txt +++ b/cloud_requirements.txt @@ -25,6 +25,8 @@ psycopg2 pysisyphus requests scipy +sklearn +spyrmsd stripe wheel xkcdpass diff --git a/frontend/management/commands/init_static_obj.py b/frontend/management/commands/init_static_obj.py index baceef58..b4ef483a 100755 --- a/frontend/management/commands/init_static_obj.py +++ b/frontend/management/commands/init_static_obj.py @@ -83,10 +83,6 @@ def handle(self, *args, **options): "opt", "Geometry", creates_ensemble=True, - avail_xtb=True, - avail_Gaussian=True, - avail_ORCA=True, - avail_NWChem=True, ) self.add_step( @@ -94,10 +90,6 @@ def handle(self, *args, **options): "conf_search", "Conformer ensemble", creates_ensemble=True, - avail_xtb=True, - avail_Gaussian=False, - avail_ORCA=False, - avail_NWChem=False, ) self.add_step( @@ -105,10 +97,6 @@ def handle(self, *args, **options): "constr_opt", "Geometry with constraint", creates_ensemble=True, - avail_xtb=True, - avail_Gaussian=True, - avail_ORCA=True, - # avail_NWChem=False, # For now ) self.add_step( @@ -116,10 +104,6 @@ def handle(self, *args, **options): "freq", "Vibrational modes, IR spectrum and thermochemistry", creates_ensemble=False, - avail_xtb=True, - avail_Gaussian=True, - avail_ORCA=True, - avail_NWChem=True, ) self.add_step( @@ -127,10 +111,6 @@ def handle(self, *args, **options): "optts", "Transition state geometry", creates_ensemble=True, - avail_xtb=True, - avail_Gaussian=True, - avail_ORCA=True, - avail_NWChem=True, ) self.add_step( @@ -138,10 +118,6 @@ def handle(self, *args, **options): "uvvis", "UV-Vis spectrum", creates_ensemble=False, - avail_xtb=True, - avail_Gaussian=True, - avail_ORCA=False, - avail_NWChem=False, ) self.add_step( @@ -149,10 +125,6 @@ def handle(self, *args, **options): "nmr", "NMR spectrum", creates_ensemble=False, - avail_xtb=False, - avail_Gaussian=True, - avail_ORCA=True, - avail_NWChem=False, # For now ) self.add_step( @@ -160,10 +132,6 @@ def handle(self, *args, **options): "sp", "Electronic energy", creates_ensemble=False, - avail_xtb=True, - avail_Gaussian=True, - avail_ORCA=True, - avail_NWChem=True, ) self.add_step( @@ -171,10 +139,6 @@ def handle(self, *args, **options): "mo", "Molecular Orbitals", creates_ensemble=False, - avail_xtb=False, - avail_Gaussian=False, - avail_ORCA=True, - avail_NWChem=True, ) self.add_step( @@ -182,10 +146,6 @@ def handle(self, *args, **options): "mep", "Minimum Energy Path", creates_ensemble=True, - avail_xtb=True, - avail_Gaussian=False, - avail_ORCA=True, - avail_NWChem=False, ) self.add_step( @@ -193,10 +153,6 @@ def handle(self, *args, **options): "constr_conf_search", "Conformer ensemble with constraint", creates_ensemble=True, - avail_xtb=True, - avail_Gaussian=False, - avail_ORCA=False, - avail_NWChem=False, ) self.add_step( @@ -204,10 +160,13 @@ def handle(self, *args, **options): "esp", "Electrostatic potential map", creates_ensemble=False, - avail_xtb=False, - avail_Gaussian=False, - avail_ORCA=False, - avail_NWChem=True, + ) + + self.add_step( + "Fast Conformational Search", + "fast_conf_search", + "Preliminary conformer ensemble", + creates_ensemble=True, ) title = "NHC-Catalysed Condensation" diff --git a/frontend/tasks.py b/frontend/tasks.py index 983f7615..99fcf374 100644 --- a/frontend/tasks.py +++ b/frontend/tasks.py @@ -45,6 +45,13 @@ from shutil import copyfile, rmtree import time +from rdkit import Chem +from rdkit.Chem import AllChem +import spyrmsd +from spyrmsd.optional.rdkit import to_molecule +from spyrmsd.rmsd import rmsdwrapper +from sklearn.cluster import DBSCAN + if not settings.IS_CLOUD: from calcus.celery import app from celery.signals import task_prerun, task_postrun @@ -337,7 +344,14 @@ def testing_delay_cloud(calc_id): return ErrorCodes.SUCCESS -def system(command, log_file="", force_local=False, software="xtb", calc_id=-1): +def system( + command, + log_file="", + force_local=False, + software="xtb", + calc_id=-1, + time_cumul=False, +): if REMOTE and not force_local: assert calc_id != -1 @@ -463,7 +477,8 @@ def system(command, log_file="", force_local=False, software="xtb", calc_id=-1): if calc_id != -1: calc = Calculation.objects.get(pk=calc_id) calc.status = 1 - calc.date_started = timezone.now() + if calc.date_started is None or not time_cumul: + calc.date_started = timezone.now() calc.save() res = AbortableAsyncResult(calc.task_id) @@ -1525,6 +1540,135 @@ def crest(in_file, calc): return ErrorCodes.SUCCESS +def fast_conf(in_file, calc): + local_folder = os.path.join(CALCUS_SCR_HOME, str(calc.id)) + folder = f"scratch/calcus/{calc.id}" # Remote + + calc_start = timezone.now() + + subprocess.call(shlex.split("obabel in.xyz -O in.mol"), cwd=local_folder) + + mol = Chem.MolFromMolFile(os.path.join(local_folder, "in.mol")) + + mol.UpdatePropertyCache() + mol = Chem.AddHs(mol) + + confs = list( + AllChem.EmbedMultipleConfs( + mol, numConfs=50, maxAttempts=1000, pruneRmsThresh=0.05 + ) + ) + mols = [] + props = [] + + for num in confs: + cpath = os.path.join(local_folder, f"conf{num+1}") + os.makedirs(cpath, exist_ok=True) + + Chem.rdmolfiles.MolToXYZFile(mol, os.path.join(cpath, "in.xyz"), confId=num) + + os.chdir(cpath) + + # We want to add up all the time required for the calculations. + # We will overwrite this with a more accurate time afterwards, but this makes the time preview + # a bit less strange. + ret = system( + calc.command, + os.path.join(cpath, "calc.out"), + software="xtb", + calc_id=calc.id, + time_cumul=True, + ) + + # duplicate code + if ret != ErrorCodes.SUCCESS: + logger.error(f"Failed to optimize conformer {num} of calc {calc.id}") + continue + + with open(f"{cpath}/xtbopt.xyz") as f: + xyz = clean_xyz("".join(f.readlines())) + + with open(f"{cpath}/calc.out") as f: + lines = f.readlines() + ind = len(lines) - 1 + + try: + while lines[ind].find("TOTAL ENERGY") == -1: + ind -= 1 + E = float(lines[ind].split()[3]) + except IndexError: + logger.error( + f"Could not parse the output of calculation {calc.id}: invalid format" + ) + continue + + rdmol = Chem.MolFromMolFile(os.path.join(cpath, "xtbtopo.mol")) + + smol = to_molecule(rdmol) + + mols.append(smol) + props.append((E, xyz)) + + def rmsd_cluster(mols, props, threshold=0.8): + # Could be too slow if there are many structures + dist = np.zeros((len(mols), len(mols))) + for i, (mol1, prop1) in enumerate(zip(mols, props)): + for j, (mol2, prop2) in enumerate(zip(mols, props)): + if j <= i: + continue + try: + d = rmsdwrapper( + mol1, mol2, symmetry=True, center=True, minimize=True + )[0] + except spyrmsd.exceptions.NonIsomorphicGraphs: + d = 10 + logger.error(f"Non-isomorphic graphs: {i} and {j} (calc {calc.id})") + + dist[i, j] = d + dist[j, i] = d + + clustering = DBSCAN(eps=threshold, min_samples=1, metric="precomputed").fit( + dist + ) + + unique_mol = [] + unique_prop = [] + known = [] # Known cluster labels + for ind, pos in enumerate(clustering.labels_): + if pos in known: + continue + known.append(pos) + unique_mol.append(mols[ind]) + unique_prop.append(props[ind]) + + return unique_mol, unique_prop + + umols, uprops = rmsd_cluster(mols, props, threshold=0.5) + for ind, (mol, (E, xyz)) in enumerate( + sorted(zip(umols, uprops), key=lambda i: i[1]) + ): + s = Structure.objects.get_or_create( + parent_ensemble=calc.result_ensemble, + number=ind + 1, + )[0] + s.degeneracy = 1 + s.xyz_structure = xyz + prop = get_or_create(calc.parameters, s) + prop.energy = E + prop.geom = True + s.save() + prop.save() + + calc_end = timezone.now() + + # Set the true start and end dates, including the conformational sampling and parsing + calc.date_started = calc_start + calc.date_finished = calc_end + calc.save() + + return ErrorCodes.SUCCESS + + def clean_struct_line(line): a, x, y, z = line.split() return f"{LOWERCASE_ATOMIC_SYMBOLS[a.lower()]} {x} {y} {z}\n" @@ -2747,13 +2891,18 @@ def calc_to_ccinput(calc): _nproc = calc.order.resource.pal _mem = calc.order.resource.memory + _step_name = calc.step.name + + if calc.parameters.software.lower() == "xtb" and ( + calc.step.name == "Fast Conformational Search" + ): + _step_name = "OPT" + # patch if calc.parameters.software.lower() == "nwchem" and ( calc.step.name == "MO Calculation" or calc.step.name == "ESP Calculation" ): _step_name = "SP" - else: - _step_name = calc.step.name params = { "software": calc.parameters.software, @@ -4108,6 +4257,7 @@ def get_Gaussian_xyz(text): "Single-Point Energy": xtb_sp, "Minimum Energy Path": mep, "Constrained Conformational Search": crest, + "Fast Conformational Search": fast_conf, }, "ORCA": { "NMR Prediction": orca_nmr, diff --git a/frontend/templates/frontend/batch_calculations.html b/frontend/templates/frontend/batch_calculations.html index 3956f9a8..acc629a7 100755 --- a/frontend/templates/frontend/batch_calculations.html +++ b/frontend/templates/frontend/batch_calculations.html @@ -245,9 +245,6 @@ }); }); - - - diff --git a/frontend/templates/frontend/form/availability.js b/frontend/templates/frontend/form/availability.js index 8d4af0b8..2d7fd8a1 100644 --- a/frontend/templates/frontend/form/availability.js +++ b/frontend/templates/frontend/form/availability.js @@ -46,6 +46,11 @@ var master_options = { "driver": ["xtb"], "theory_level": ["xtb"] }, + "Fast Conformational Search": { + "software": ["xtb"], + "driver": ["xtb"], // Not quite true, but good enough for now + "theory_level": ["xtb"] + }, "Constrained Conformational Search": { "software": ["xtb"], "driver": ["xtb"], @@ -70,7 +75,7 @@ var master_options = { "driver": ["ORCA", "Pysisyphus"] }, "xtb": { - "type": ["Geometrical Optimisation", "TS Optimisation", "Frequency Calculation", "Constrained Optimisation", "Single-Point Energy", "UV-Vis Calculation", "Conformational Search", "Constrained Conformational Search", "Minimum Energy Path"], + "type": ["Geometrical Optimisation", "TS Optimisation", "Frequency Calculation", "Constrained Optimisation", "Single-Point Energy", "UV-Vis Calculation", "Conformational Search", "Constrained Conformational Search", "Minimum Energy Path", "Fast Conformational Search"], "solvation_model": ["ALPB", "GBSA"], "solvation_radii": ["Default"], "theory_level": ["xtb"], @@ -152,17 +157,17 @@ var master_options = { } {% endif %} } -var list_available_elements = ["df", "pbeh3c", "hf3c", "se_method", "functional_field", "xtb_method", "basis_set_field", "custom_bs", "aux_file_structure", "aux_structure", "solvation_model", "solvation_radii", "constraints"]; +var list_available_elements = ["df", "pbeh3c", "hf3c", "se_method", "functional_field", "xtb_method", "basis_set_field", "custom_bs", "aux_file_structure", "aux_structure", "solvation_model", "solvation_radii", "constraints", "new_type_badge"]; var master_available = { "software": { "Gaussian": ["df", "se_method", "functional_field", "basis_set_field", "custom_bs", "solvation_model", "solvation_radii", "constraints"], "ORCA": ["hf3c", "pbeh3c", "se_method", "functional_field", "basis_set_field", "custom_bs", "solvation_model", "solvation_radii", "constraints", "aux_file_structure", "aux_structure"], - "xtb": ["aux_file_structure", "aux_structure", "solvation_model", "solvation_radii", "constraints", "xtb_method"], + "xtb": ["aux_file_structure", "aux_structure", "solvation_model", "solvation_radii", "constraints", "xtb_method", "new_type_badge"], "NWChem": ["functional_field", "basis_set_field", "solvation_model", "solvation_radii"], }, "theory_level": { - "xtb": ["aux_file_structure", "aux_structure", "solvation_model", "solvation_radii", "constraints", "xtb_method"], + "xtb": ["aux_file_structure", "aux_structure", "solvation_model", "solvation_radii", "constraints", "xtb_method", "new_type_badge"], "semiempirical": ["aux_file_structure", "aux_structure", "se_method", "solvation_model", "solvation_radii", "constraints"], "hf": ["aux_file_structure", "aux_structure", "hf3c", "basis_set_field", "custom_bs", "solvation_model", "solvation_radii", "constraints"], "dft": ["aux_file_structure", "aux_structure", "df", "pbeh3c", "functional_field", "basis_set_field", "custom_bs", "solvation_model", "solvation_radii", "constraints"] @@ -180,10 +185,11 @@ var master_available = { "MO Calculation": ["df", "pbeh3c", "hf3c", "functional_field", "basis_set_field", "custom_bs", "solvation_model", "solvation_radii"], "UV-Vis Calculation": ["df", "pbeh3c", "hf3c", "functional_field", "basis_set_field", "custom_bs", "solvation_model", "solvation_radii", "xtb_method"], "Conformational Search": ["solvation_model", "solvation_radii", "xtb_method"], + "Fast Conformational Search": ["solvation_model", "solvation_radii", "xtb_method", "new_type_badge"], "Constrained Conformational Search": ["solvation_model", "solvation_radii", "constraints", "xtb_method"] }, "solvent": { - "": ["df", "pbeh3c", "hf3c", "se_method", "functional_field", "basis_set_field", "custom_bs", "aux_file_structure", "aux_structure", "constraints", "xtb_method"] + "": ["df", "pbeh3c", "hf3c", "se_method", "functional_field", "basis_set_field", "custom_bs", "aux_file_structure", "aux_structure", "constraints", "xtb_method", "new_type_badge"] } } diff --git a/frontend/templates/frontend/form/type_body.html b/frontend/templates/frontend/form/type_body.html index 11d004a1..b54b05a4 100644 --- a/frontend/templates/frontend/form/type_body.html +++ b/frontend/templates/frontend/form/type_body.html @@ -7,6 +7,7 @@ {% endfor %} + diff --git a/requirements.txt b/requirements.txt index 6a41a711..9d892a12 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,10 @@ pre-commit psutil==5.7.2 psycopg2 pysisyphus +rdkit requests scipy +sklearn +spyrmsd wheel xkcdpass