Skip to content

Commit

Permalink
add OH,CH3 subs; avoid duplicate substituents; add to tests
Browse files Browse the repository at this point in the history
  • Loading branch information
lpratalimaffei committed Dec 27, 2024
1 parent 825efd2 commit 3f35483
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 13 deletions.
25 changes: 18 additions & 7 deletions automol/graph/_3super_func_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def classify_species(gra):
"C5,OH-M",
"A1,OH-M",
"A1,OH,OH-M",
"A1,OH,CH3-M",
"A1,OH,CHO-M",
"A1,OH,OCH3-M",
"A1,CHO-M",
Expand Down Expand Up @@ -111,28 +112,36 @@ def assign_grps(self, gra):
# assign base groups
for key, fct in BASE_GRP_DCT.items():
self.sup_grps[key] = self.grp_fct_dct[fct]

base_grps_0 = list(itertools.chain(*[grp for grp in self.sup_grps.values() if len(grp) > 0]))
# assign substituents
subs_fct_dct = {}
for key, fct in SUBSTITUENTS_GRP_DCT.items():
subs_fct_dct[key] = self.grp_fct_dct[fct]

# CH3CK C6H5C2H2, C6H5C2H4!!
# assign composite
heavy_atms = list(implicit(gra)[0].keys())
for comp_grp in COMPOSITE_GRP_LIST:
base_and_subs, base_type = comp_grp.split("-")
base, subs = (
base_and_subs.split(",")[0] + "-" + base_type,
base_and_subs.split(",")[1:],
)

base_grps = self.sup_grps[base] # base groups to search substituents in
for sub in subs:
sub_grps = subs_fct_dct[sub]
# intersection becomes the new base_grps;
sub_grps_eff = ()

# if the atoms of the substituent are part of (any) base group: skip
for grp in sub_grps:
if not any(all(atm in basei for atm in grp if atm in heavy_atms) for basei in base_grps_0):
sub_grps_eff += (grp,)
# intersection base+sub becomes the new base_grps;
# filter by bond type, e.g., C-C, C-O..
# with bonded_grps only: fails for OCH3
# (CH2-O bonded to an aromatic would work too)
base_grps = bonded_grps_checksymb(gra, base_grps, sub_grps, "C", sub[0])
base_grps = bonded_grps_checksymb(gra, base_grps, sub_grps_eff, "C", sub[0])
# add to dct
self.sup_grps[comp_grp] = base_grps

Expand Down Expand Up @@ -179,6 +188,7 @@ def bonded_grps_checksymb(gra, grps1, grps2, symb1, symb2):
heavy_atms = list(implicit(gra)[0].keys())
correct_bonds = bonds_of_type(gra, symb1, symb2)
grps = ()
assigned_grps2 = () # make sure that each substituent is assigned to one group only
if len(grps1) > 0 and len(grps2) > 0 and len(correct_bonds) > 0:
for grp1 in grps1:
# keep only heavy atoms
Expand All @@ -187,13 +197,14 @@ def bonded_grps_checksymb(gra, grps1, grps2, symb1, symb2):
grp2 = tuple(
atm for atm in grp2 if atm in heavy_atms and atm not in grp1
)
possible_bonds = list(itertools.product(grp1, grp2))
effective_bonds = (
possible_bonds = list(itertools.product(grp1, grp2)) + list(itertools.product(grp2, grp1))
effective_bonds = list(
bond for bond in possible_bonds if frozenset(bond) in gra[1].keys()
)
if len(tuple(set(effective_bonds).intersection(correct_bonds))) > 0:
grp = grp1 + grp2
if sorted(grp) not in [sorted(grpi) for grpi in grps]:
if sorted(grp) not in [sorted(grpi) for grpi in grps] and grp2 not in assigned_grps2:
grps += (grp,)

assigned_grps2 += (grp2,)

return grps
26 changes: 20 additions & 6 deletions automol/tests/test_super_func_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,34 +37,42 @@
automol.smiles.chi(smi)))

SPCS_GRPS = {
'BZFUR': {'FUR-M': ((4, 5, 8, 7, 6),) ,
'A1-M': ((0, 1, 3, 7, 6, 2),) ,},
'AMN': {'A1-M': ((5, 6, 9, 10, 7, 8), (1, 2, 4, 10, 9, 3)) ,
'A1,CH3-M': ((5, 6, 9, 10, 7, 8, 0),)},
'C6H5': {'A1-R': ((0, 1, 3, 5, 4, 2),)},
'INDENYL': {
'C5-RSR': ((2, 5, 7, 8, 6),) ,
},
'C12H8': {
'A1-M': ((0, 2, 8, 11, 9, 4), (1, 3, 8, 11, 10, 5)) ,
'A1,C2H3-M': ((0, 2, 8, 11, 9, 4, 6, 7),),
},
'C9H7O': {'C5O-RSR': ((4, 5, 8, 7, 6, 9),) ,},
'C5H4O': {'C5O-M': ((0, 1, 3, 4, 2, 5),) ,},
'OC6H4CH3': {'A1O-RSR': ((1, 2, 4, 6, 5, 3, 7),) ,
},
'HOC6H4CH3': {
'A1-M': ((1, 2, 5, 4, 6, 3),) ,
'A1,OH-M': ((1, 2, 5, 4, 6, 3, 7),)
}, # da aggiungere altri},
'A1,CH3-M': ((1, 2, 5, 4, 6, 3, 0),),
'A1,OH-M': ((1, 2, 5, 4, 6, 3, 7),),
'A1,OH,CH3-M': ((1, 2, 5, 4, 6, 3, 7, 0),),
},
'C6H5CH2OOH': {'A1-M': ((0, 1, 3, 6, 4, 2),) ,
},
'BZFUR': {'FUR-M': ((4, 5, 8, 7, 6),) ,
'A1-M': ((0, 1, 3, 7, 6, 2),) ,},
'C10H7CH2': {'A1CH2-RSR': ((3, 4, 8, 10, 9, 6, 0),) ,},
'C6H5C2H3': {'A1-M': ((2, 3, 5, 7, 6, 4),) ,
'A1,C2H3-M': ((2, 3, 5, 7, 6, 4, 0, 1),),
},
'C10H9': {'A1CH2-RSR': ((0, 1, 5, 9, 8, 4, 7), (0, 1, 5, 9, 8, 4, 6)) ,
},
'CH3C6H4': {'A1-R': ((1, 2, 4, 6, 5, 3),) ,
'A1,CH3-R': ((1, 2, 4, 6, 5, 3, 0),),
},
'C6H5CCC6H5': {
'A1-M': ((1, 4, 8, 13, 9, 5), (0, 2, 6, 12, 7, 3)) ,
'A1,C2H-M': ((1, 4, 8, 13, 9, 5, 10, 11),),
},
'C6H5C3H3-A': {'A1-M': ((2, 3, 6, 8, 7, 4),) ,
'A1,C3.DD-M': ((2, 3, 6, 8, 7, 4, 0, 1, 5),)},
Expand All @@ -73,6 +81,7 @@
},
'CYC5H7': {'C5H2-RSR': ((0, 1, 3, 4, 2),) ,},
'MEINDENYL': {'C5-RSR': ((5, 6, 8, 9, 7),) ,
'C5,CH3-RSR': ((5, 6, 8, 9, 7, 0),)
},
'BENZOFLUORENE': {'C5-M': ((10, 12, 15, 16, 13),) ,
'A1-M': ((0, 2, 6, 14, 11, 4), (8, 9, 13, 16, 14, 11), (1, 3, 7, 15, 12, 5)) ,
Expand All @@ -85,17 +94,22 @@
'SALICALD': {
'A1-M': ((0, 1, 3, 6, 5, 2),) ,
'A1,OH-M': ((0, 1, 3, 6, 5, 2, 8),) ,
'A1,CHO-M': ((0, 1, 3, 6, 5, 2, 4, 7),),
'A1,OH,CHO-M': ((0, 1, 3, 6, 5, 2, 8, 4, 7),),
},
}

def test_super_functional_group_dct():

for SPC, DCT in SPCS_GRPS.items():
print(SPC)
gra = SPCS_CHECKS_SMI[SPC]
fgrps = automol.graph.SuperFunctionalGroup()
fgrps.assign_grps(gra)
for key, val in DCT.items():
assert val == fgrps.sup_grps[key]
for key, val in fgrps.sup_grps.items():
if len(val) > 0:
assert val == DCT[key]


if __name__ == '__main__':
test_super_functional_group_dct()

0 comments on commit 3f35483

Please sign in to comment.