From 80b4d57a1b212441e8a6f56dd49a5f68bd911818 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 7 Dec 2024 20:59:12 +0200 Subject: [PATCH 1/4] Extended hardcoded triplet [O][O] perception to [S][S] as well --- arc/species/converter.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arc/species/converter.py b/arc/species/converter.py index 50718e9487..a813381f38 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -1380,12 +1380,14 @@ def molecules_from_xyz(xyz: Optional[Union[dict, str]], return None, None xyz = check_xyz_dict(xyz) - if xyz['symbols'] == ('O', 'O') and multiplicity != 1: - coords = np.asarray(xyz['coords'], dtype=np.float32) - vector = coords[0] - coords[1] - if float(np.dot(vector, vector) ** 0.5) < 1.4: - # Special case for O2 triplet - return Molecule(smiles='[O][O]'), Molecule(smiles='[O][O]') + if len(xyz['symbols']) == 2: + for element, bond_length in zip(['O', 'S'], [1.4, 2.1]): + if xyz['symbols'] == (element, element) and multiplicity != 1: + coords = np.asarray(xyz['coords'], dtype=np.float32) + vector = coords[0] - coords[1] + if float(np.dot(vector, vector) ** 0.5) < bond_length: + # Special case for O2 and S2 triplet + return Molecule(smiles=f'[{element}][{element}]'), Molecule(smiles=f'[{element}][{element}]') # 1. Generate a molecule with no bond order information with atoms ordered as in xyz. mol_graph = MolGraph(symbols=xyz['symbols'], coords=xyz['coords']) From eaa5d912b6fd65ef624a26ec43863086d9701968 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 7 Dec 2024 20:59:38 +0200 Subject: [PATCH 2/4] Tests: Triplet [S][S] perception --- arc/species/converter_test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arc/species/converter_test.py b/arc/species/converter_test.py index 9b302e7272..d4e6b4c4f3 100644 --- a/arc/species/converter_test.py +++ b/arc/species/converter_test.py @@ -3588,8 +3588,10 @@ def test_xyz_to_smiles(self): mol18 = converter.molecules_from_xyz(converter.str_to_xyz(xyz18))[1] mol19 = converter.molecules_from_xyz(converter.str_to_xyz(xyz19))[1] mol20 = converter.molecules_from_xyz(converter.str_to_xyz(xyz20))[1] - mol21 = converter.molecules_from_xyz("""O 0.0000000 0.0000000 0.6076340 - O 0.0000000 0.0000000 -0.6076340""")[1] + mol21 = converter.molecules_from_xyz("""O 0.0000000 0.0000000 0.6076340 + O 0.0000000 0.0000000 -0.6076340""")[1] + mol22 = converter.molecules_from_xyz("""S 0.0000000 0.0000000 0.9547220 + S 0.0000000 0.0000000 -0.9547220""")[1] self.assertEqual(mol1.to_smiles(), '[NH-][S+](=O)(O)C') self.assertIn(mol2.to_smiles(), ['COC1=C(CO)C=C([C](C)C)C=C1', 'COC1C=CC(=CC=1CO)[C](C)C']) @@ -3622,6 +3624,8 @@ def test_xyz_to_smiles(self): self.assertEqual(mol20.to_smiles(), 'C=C[CH]C=CC') self.assertEqual(mol21.to_smiles(), '[O][O]') self.assertEqual(mol21.multiplicity, 3) + self.assertEqual(mol22.to_smiles(), '[S][S]') + self.assertEqual(mol22.multiplicity, 3) def test_to_rdkit_mol(self): """Test converting an RMG Molecule object to an RDKit Molecule object""" From e25b80eae243ea2949beb9f6a83ea5a3737277e2 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 7 Dec 2024 21:04:13 +0200 Subject: [PATCH 3/4] Removed triplet O2 and S2 hardcoding from species mol_from_xyz() --- arc/species/species.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arc/species/species.py b/arc/species/species.py index 7c0df39d7c..714a1ae666 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -1583,11 +1583,6 @@ def mol_from_xyz(self, if xyz is None: return None - if len(xyz['symbols']) == 2 and xyz['symbols'][0] == xyz['symbols'][1] \ - and xyz['symbols'][0] in ['O', 'S'] and self.multiplicity == 3: - # Hard-coded for triplet O2 and S2: Don't perceive mol. - return None - if self.mol is not None: if len(self.mol.atoms) != len(xyz['symbols']): raise SpeciesError(f'The number of atoms in the molecule and in the coordinates of {self.label} is different.' From 92bbc312ba2e62cf37e9cc9275af1441cc6bdcb9 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 7 Dec 2024 21:04:31 +0200 Subject: [PATCH 4/4] Tests: Triplet S2 and O2 perception in species mol_from_xyz() --- arc/species/species_test.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arc/species/species_test.py b/arc/species/species_test.py index 7a0b1a5658..2813e840d4 100644 --- a/arc/species/species_test.py +++ b/arc/species/species_test.py @@ -1686,6 +1686,24 @@ def test_mol_from_xyz(self): so2.mol_from_xyz(xyz=so2_t_xyz, get_cheap=False) self.assertEqual([atom.element.symbol for atom in so2.mol.atoms], ['S', 'O', 'O']) + # S2 + s2_xyz = """S 0.0000000 0.0000000 0.9547220 + S 0.0000000 0.0000000 -0.9547220""" + s2 = ARCSpecies(label='S2', smiles='[S][S]') + self.assertEqual(s2.multiplicity, 3) + self.assertEqual(s2.charge, 0) + s2.mol_from_xyz(xyz=str_to_xyz(s2_xyz), get_cheap=False) + self.assertEqual([atom.element.symbol for atom in s2.mol.atoms], ['S', 'S']) + self.assertEqual(s2.mol.to_smiles(), '[S][S]') + self.assertEqual(s2.mol.multiplicity, 3) + + # O2 + o2_xyz = {'symbols': ('O', 'O'), 'isotopes': (16, 16), 'coords': ((0.0, 0.0, 0.6029), (0.0, 0.0, -0.6029))} + o2 = ARCSpecies(label='O2aa', xyz=o2_xyz, multiplicity=3) + self.assertEqual(o2.multiplicity, 3) + self.assertEqual(o2.mol.multiplicity, 3) + self.assertEqual(o2.mol.to_smiles(), '[O][O]') + def test_consistent_atom_order(self): """Test that the atom order is preserved whether starting from SMILES or from xyz""" xyz9 = """O -1.17310019 -0.30822930 0.16269772