From 242e3d0eec45b3a975d45ff6c34bf41545b5939d Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 29 Jun 2024 14:37:33 +0300 Subject: [PATCH 01/43] Minor: CI style modifications --- .github/workflows/cont_int.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cont_int.yml b/.github/workflows/cont_int.yml index 20d57c6f82..f233a2e9e9 100644 --- a/.github/workflows/cont_int.yml +++ b/.github/workflows/cont_int.yml @@ -11,7 +11,6 @@ on: schedule: - cron: '0 0 * * *' - jobs: build: runs-on: ubuntu-latest @@ -60,6 +59,7 @@ jobs: key: ${{ runner.os }}-rmgdb-main restore-keys: | ${{ runner.os }}-rmgdb- + - name: Checkout RMG-database if: steps.cache-rmg-db.outputs.cache-hit != 'true' uses: actions/checkout@v3 @@ -129,6 +129,7 @@ jobs: path: ~/conda_pkgs_dir key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }} + - name: Setup ARC Env uses: conda-incubator/setup-miniconda@v2 with: From 283dd124b7157c166fc278fcfac98877b15cb928 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 1 Jul 2024 05:55:12 +0300 Subject: [PATCH 02/43] Removed RMG-Py and RMG-database repo paths from common were used to report the git branch and commit of the repos --- arc/common.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arc/common.py b/arc/common.py index fa12084b62..0cf5e0764f 100644 --- a/arc/common.py +++ b/arc/common.py @@ -50,10 +50,6 @@ # Absolute path to the ARC folder. ARC_PATH = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) -# Absolute path to RMG-Py folder. -RMG_PATH = os.path.abspath(os.path.dirname(os.path.dirname(rmgpy.__file__))) -# Absolute path to RMG-database folder. -RMG_DATABASE_PATH = os.path.abspath(os.path.dirname(rmgpy.settings['database.directory'])) VERSION = '1.1.0' @@ -278,7 +274,7 @@ def log_header(project: str, logger.log(level, '###############################################################') logger.log(level, '') - paths_dict = {'ARC': ARC_PATH, 'RMG-Py': RMG_PATH, 'RMG-database': RMG_DATABASE_PATH} + paths_dict = {'ARC': ARC_PATH} for repo, path in paths_dict.items(): # Extract HEAD git commit. head, date = get_git_commit(path) From 6a95e5b0e2db29ed344f27dfe02c3c88ade5060e Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 29 Jun 2024 14:43:35 +0300 Subject: [PATCH 03/43] Relocated reaction into a sub module --- arc/__init__.py | 2 +- arc/reaction/__init__.py | 2 ++ arc/{ => reaction}/reaction.py | 0 arc/{ => reaction}/reaction_test.py | 4 ++-- 4 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 arc/reaction/__init__.py rename arc/{ => reaction}/reaction.py (100%) rename arc/{ => reaction}/reaction_test.py (99%) diff --git a/arc/__init__.py b/arc/__init__.py index 5707031fd4..28d80f6763 100644 --- a/arc/__init__.py +++ b/arc/__init__.py @@ -10,8 +10,8 @@ import arc.processor import arc.scheduler import arc.utils - import arc.job +import arc.reaction import arc.settings import arc.species import arc.statmech diff --git a/arc/reaction/__init__.py b/arc/reaction/__init__.py new file mode 100644 index 0000000000..e33a984d39 --- /dev/null +++ b/arc/reaction/__init__.py @@ -0,0 +1,2 @@ +import arc.reaction.family +from arc.reaction.reaction import ARCReaction diff --git a/arc/reaction.py b/arc/reaction/reaction.py similarity index 100% rename from arc/reaction.py rename to arc/reaction/reaction.py diff --git a/arc/reaction_test.py b/arc/reaction/reaction_test.py similarity index 99% rename from arc/reaction_test.py rename to arc/reaction/reaction_test.py index 5c3b85e7fc..e98621f2e0 100644 --- a/arc/reaction_test.py +++ b/arc/reaction/reaction_test.py @@ -2,7 +2,7 @@ # encoding: utf-8 """ -This module contains unit tests of the arc.reaction module +This module contains unit tests of the arc.reaction.reaction module """ from itertools import permutations @@ -17,7 +17,7 @@ from arc.common import ARC_PATH, almost_equal_lists, read_yaml_file from arc.exceptions import ReactionError from arc.main import ARC -from arc.reaction import ARCReaction, remove_dup_species +from arc.reaction.reaction import ARCReaction, remove_dup_species from arc.scheduler import Scheduler from arc.species import ARCSpecies from arc.mapping.engine import check_atom_map, label_species_atoms From f4fdf54b7282cadb39ff32ed1893bcc32ef6f93c Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Tue, 2 Jul 2024 19:38:07 +0300 Subject: [PATCH 04/43] Added RMG_DB_PATH to settings --- arc/settings/settings.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arc/settings/settings.py b/arc/settings/settings.py index 72946e303a..a8f62aac1f 100644 --- a/arc/settings/settings.py +++ b/arc/settings/settings.py @@ -292,11 +292,12 @@ LOWEST_MAJOR_TS_FREQ, HIGHEST_MAJOR_TS_FREQ = 75.0, 10000.0 # default environment names for sister repos -TS_GCN_PYTHON, TANI_PYTHON, AUTOTST_PYTHON, ARC_PYTHON, XTB, OB_PYTHON = None, None, None, None, None, None +TS_GCN_PYTHON, TANI_PYTHON, AUTOTST_PYTHON, ARC_PYTHON, XTB, OB_PYTHON, RMG_DB_PATH = \ + None, None, None, None, None, None, None home = os.getenv("HOME") or os.path.expanduser("~") tani_pypath_1 = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(sys.executable))), - 'tani_env', 'bin', 'python') + 'tani_env', 'bin', 'python') tani_pypath_2 = os.path.join(home, 'mambaforge', 'envs', 'tani_env', 'bin', 'python') tani_pypath_3 = os.path.join(home, 'anaconda3', 'envs', 'tani_env', 'bin', 'python') tani_pypath_4 = os.path.join(home, 'miniconda3', 'envs', 'tani_env', 'bin', 'python') @@ -307,7 +308,7 @@ TANI_PYTHON = tani_pypath ob_pypath_1 = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(sys.executable))), - 'ob_env', 'bin', 'python') + 'ob_env', 'bin', 'python') ob_pypath_2 = os.path.join(home, 'mambaforge', 'envs', 'ob_env', 'bin', 'python') ob_pypath_3 = os.path.join(home, 'anaconda3', 'envs', 'ob_env', 'bin', 'python') ob_pypath_4 = os.path.join(home, 'miniconda3', 'envs', 'ob_env', 'bin', 'python') @@ -342,7 +343,7 @@ paths = list() paths.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(sys.executable))), - 'xtb_env', 'bin', 'xtb')) + 'xtb_env', 'bin', 'xtb')) paths.append(os.path.join(home, 'anaconda3', 'envs', 'xtb_env', 'bin', 'xtb')) paths.append(os.path.join(home, 'miniconda3', 'envs', 'xtb_env', 'bin', 'xtb')) paths.append(os.path.join(home, '.conda', 'envs', 'xtb_env', 'bin', 'xtb')) @@ -362,3 +363,20 @@ if os.path.isfile(arc_pypath): ARC_PYTHON = arc_pypath break + +rmg_db_path_1 = None +for python_path in sys.path: + if 'RMG-database' in python_path: + rmg_db_path_1 = python_path + break +rmg_db_path_2 = None +for python_path in sys.path: + if 'RMG-Py' in python_path: + rmg_db_path_2 = os.path.join(os.path.dirname(python_path), 'RMG-database') + break +rmg_db_path_3 = os.path.join(home, 'Code', 'RMG-database') +rmg_db_path_4 = os.path.join(home, 'runner', 'work', 'ARC', 'ARC', 'RMG-database') +for rmg_db_path in [rmg_db_path_1, rmg_db_path_2, rmg_db_path_3, rmg_db_path_4]: + if rmg_db_path is not None and os.path.isdir(rmg_db_path): + RMG_DB_PATH = rmg_db_path + break From e005c511ca2d127b3b7aa80a810fd1b4c2484ba1 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 29 Jun 2024 16:34:00 +0300 Subject: [PATCH 05/43] Added clean_text() to common --- arc/common.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arc/common.py b/arc/common.py index 0cf5e0764f..dadd413bdb 100644 --- a/arc/common.py +++ b/arc/common.py @@ -1058,6 +1058,24 @@ def is_str_int(value: Optional[str]) -> bool: return False +def clean_text(text: str) -> str: + """ + Clean a text string from leading and trailing whitespaces, newline characters, and double quotes. + + Args: + text (str): The text to clean. + + Returns: + str: The cleaned text. + """ + text = text.strip() + text = text.lstrip('\n').rstrip('\n') + text = text.replace('"', '') + text = text.rstrip(',') + text = text.lstrip('\n').rstrip('\n') + return text + + def time_lapse(t0) -> str: """ A helper function returning the elapsed time since t0. From 9036d5cf94c7e2f0fc900c53ec6239e7e1263e2c Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 29 Jun 2024 16:34:13 +0300 Subject: [PATCH 06/43] Tests: common.clean_text() --- arc/common_test.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arc/common_test.py b/arc/common_test.py index 26d4e997f1..4287b0fb8b 100644 --- a/arc/common_test.py +++ b/arc/common_test.py @@ -588,6 +588,19 @@ def test_is_str_int(self): self.assertFalse(common.is_str_int('125.84')) self.assertFalse(common.is_str_int('0.0')) + def test_clean_text(self): + """Test the clean_text() function""" + self.assertEqual(common.clean_text('R1'), 'R1') + self.assertEqual(common.clean_text(' D_3_5_7_4"\n'), 'D_3_5_7_4') + self.assertEqual(common.clean_text('"OR{Cd_Cdd, Cdd_Cd, Cd_Cd, Sd_Cd, N1dc_N5ddc, N3d_Cd}",\n '), + 'OR{Cd_Cdd, Cdd_Cd, Cd_Cd, Sd_Cd, N1dc_N5ddc, N3d_Cd}') + self.assertEqual(common.clean_text('\n"""\n1 *1 Cd u0 {2,D} {3,S} {4,S}\n2 *2 Cdd u0 {1,D} {5,D}\n3 H u0 {1,S}\n4 H u0 {1,S}\n5 [O2d,S2d] u0 {2,D}\n""",\n '), + """1 *1 Cd u0 {2,D} {3,S} {4,S} +2 *2 Cdd u0 {1,D} {5,D} +3 H u0 {1,S} +4 H u0 {1,S} +5 [O2d,S2d] u0 {2,D}""") + def test_get_atom_radius(self): """Test determining the covalent radius of an atom""" self.assertEqual(common.get_atom_radius('C'), 0.76) From b40a4cb37a21e9f500c4b412d001f7f80afadbe4 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Tue, 2 Jul 2024 12:10:43 +0300 Subject: [PATCH 07/43] Added hydrolysis as an ARC reaction family ARC reaction families are families for which only a template/recipe is given without kinetic data. They can be used by ARC to atom-map a reaction and search for a transition state just like RMG families. They have the same format as the `group.py` file in RMG reaction families, only here the files are named by the family label. ARC families should be located under ARC/data/families/ to be discovered by ARC. Also added ARC_FAMILIES_PATH to settings --- arc/settings/settings.py | 3 +++ data/families/hydrolysis.py | 54 +++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 data/families/hydrolysis.py diff --git a/arc/settings/settings.py b/arc/settings/settings.py index a8f62aac1f..bd67079741 100644 --- a/arc/settings/settings.py +++ b/arc/settings/settings.py @@ -291,6 +291,9 @@ # An imaginary frequency is valid if it is between the following range (in cm-1): LOWEST_MAJOR_TS_FREQ, HIGHEST_MAJOR_TS_FREQ = 75.0, 10000.0 +# ARC families folder path +ARC_FAMILIES_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'families') + # default environment names for sister repos TS_GCN_PYTHON, TANI_PYTHON, AUTOTST_PYTHON, ARC_PYTHON, XTB, OB_PYTHON, RMG_DB_PATH = \ None, None, None, None, None, None, None diff --git a/data/families/hydrolysis.py b/data/families/hydrolysis.py new file mode 100644 index 0000000000..853d088c00 --- /dev/null +++ b/data/families/hydrolysis.py @@ -0,0 +1,54 @@ +name = "hydrolysis/groups" +shortDesc = u"hydrolysis" +longDesc = u""" +A generic bimolecular hydrolysis reaction: AB + H2O <=> AH + BOH + +R1(*1)[O/N](*3)R2(*2) + H(*4)O(*5)H <=> R1(*1)[O/N](*3)H(*4) + R2(*2)O(*5)H + +""" + +template(reactants=["R1ONR2", "H2O"], products=["R1ONH", "R2OH"], ownReverse=False) + +reverse = "condensation" + +reversible = True + +recipe(actions=[ + ['BREAK_BOND', '*3', 1, '*2'], + ['BREAK_BOND', '*4', 1, '*5'], + ['FORM_BOND', '*3', 1, '*4'], + ['FORM_BOND', '*2', 1, '*5'], +]) + +entry( + index = 0, + label = "R1ONR2", + group = +""" +1 *1 R!H u0 p0 c0 {2,S} +2 *3 [O2s,N3s] u0 p[1,2] c0 {1,S} {3,S} +3 *2 R!H u0 p0 c0 {2,S} +""", + kinetics = None, +) + +entry( + index = 1, + label = "H2O", + group = +""" +1 *4 O u0 p2 c0 {2,S} {3,S} +2 *5 H u0 p0 c0 {1,S} +3 H u0 p0 c0 {1,S} +""", + kinetics = None, +) + + +tree( +""" +L1: R1ONR2 +L1: H2O +""" +) + From cbdfaedceb4c266fa2cb2bacd2add46841670849 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Tue, 2 Jul 2024 09:51:39 +0300 Subject: [PATCH 08/43] Added the reaction family module --- arc/reaction/family.py | 775 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 775 insertions(+) create mode 100644 arc/reaction/family.py diff --git a/arc/reaction/family.py b/arc/reaction/family.py new file mode 100644 index 0000000000..b078c0852f --- /dev/null +++ b/arc/reaction/family.py @@ -0,0 +1,775 @@ +""" +A module for working with RMG reaction families. +""" + +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union +import ast +import os +import re + +from rmgpy.molecule import Bond, Group, Molecule + +from arc.common import clean_text, get_logger +from arc.imports import settings + +if TYPE_CHECKING: + from arc.species import ARCSpecies + from arc.reaction.reaction import ARCReaction + +RMG_DB_PATH = settings['RMG_DB_PATH'] +ARC_FAMILIES_PATH = settings['ARC_FAMILIES_PATH'] + + +logger = get_logger() + + +class ReactionFamily(object): + """ + A class for representing a reaction family. + + Args: + label (str): The reaction family label. + consider_arc_families (bool, optional): Whether to consider ARC's custom families + when searching for the family groups file. + + Attributes: + label (str): The reaction family label. + """ + + def __init__(self, + label: str, + consider_arc_families: bool = True, + ): + self.label = label + self.groups_as_lines = self.get_groups_file_as_lines(consider_arc_families=consider_arc_families) + self.reversible = is_reversible(self.groups_as_lines) + self.own_reverse = is_own_reverse(self.groups_as_lines) + self.reactants = get_reactant_groups_from_template(self.groups_as_lines) + self.reactant_num = self.get_reactant_num() + self.product_num = get_product_num(self.groups_as_lines) + entry_labels = list() + for reactant_group in self.reactants: + entry_labels.extend(reactant_group) + self.entries = get_entries(self.groups_as_lines, entry_labels=entry_labels) + self.actions = get_recipe_actions(self.groups_as_lines) + + def __str__(self): + """ + A string representation of the object. + """ + return f'ReactionFamily(label={self.label})' + + def get_groups_file_as_lines(self, consider_arc_families: bool = True) -> List[str]: + """ + Get the groups file as a list of lines. + Precedence is given to RMG families (ARC families should therefore have distinct names than RMG's) + + Args: + consider_arc_families (bool, optional): Whether to consider ARC's custom families. + + Returns: + List[str]: The groups file as a list of lines. + """ + groups_path = os.path.join(RMG_DB_PATH, 'input', 'kinetics', 'families', self.label, 'groups.py') + if not os.path.isfile(groups_path): + if consider_arc_families: + groups_path = os.path.join(ARC_FAMILIES_PATH, f'{self.label}.py') + if not os.path.isfile(groups_path): + raise FileNotFoundError(f'Could not find the groups file for family {self.label}') + with open(groups_path, 'r') as f: + groups_as_lines = f.readlines() + return groups_as_lines + + def generate_products(self, + reactants: List['ARCSpecies'], + ) -> Dict[Union[str, Tuple[str, str]], List[List['Molecule']]]: + """ + Generate a list of all the possible reaction products of this family starting from the list of ``reactants``. + + reactant_to_group_maps has the following structure:: + + {0: [{'group': 1, 'subgroup':