From 81e9cb23868c7d8e6fe391b7809acfedae306661 Mon Sep 17 00:00:00 2001 From: Gareth Jones Date: Sat, 28 Oct 2023 13:47:58 +0300 Subject: [PATCH] Fix stereo bond corruption on RGD. (#6832) --- .../ChemTransforms/ChemTransforms.cpp | 54 +++++--- .../RGroupDecomposition/RGroupCore.cpp | 18 ++- .../RGroupDecomposition/testRGroupDecomp.cpp | 124 +++++++++++++++++- .../RGroupDecompositionTest.cs | 75 +++++++++-- 4 files changed, 240 insertions(+), 31 deletions(-) diff --git a/Code/GraphMol/ChemTransforms/ChemTransforms.cpp b/Code/GraphMol/ChemTransforms/ChemTransforms.cpp index 770819ade36..8053d6c640d 100644 --- a/Code/GraphMol/ChemTransforms/ChemTransforms.cpp +++ b/Code/GraphMol/ChemTransforms/ChemTransforms.cpp @@ -196,10 +196,9 @@ std::vector replaceSubstructs( INT_VECT delList; // now loop over the list of matches and replace them: - for (std::vector::const_iterator mati = fgpMatches.begin(); - mati != fgpMatches.end(); mati++) { + for (const auto& fgpMatche : fgpMatches) { INT_VECT match; // each match onto the molecule - list of atoms ids - for (const auto &mi : *mati) { + for (const auto &mi : fgpMatche) { match.push_back(mi.second); } @@ -290,24 +289,22 @@ ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery, } boost::dynamic_bitset<> matchingIndices(mol.getNumAtoms()); - for (MatchVectType::const_iterator mvit = matchV.begin(); - mvit != matchV.end(); ++mvit) { - matchingIndices[mvit->second] = 1; + for (auto mvit : matchV) { + matchingIndices[mvit.second] = 1; } auto *newMol = new RWMol(mol); boost::dynamic_bitset<> keepSet(newMol->getNumAtoms()); std::vector dummyIndices; - for (MatchVectType::const_iterator mvit = matchV.begin(); - mvit != matchV.end(); ++mvit) { - keepSet.set(mvit->second); + for (auto mvit : matchV) { + keepSet.set(mvit.second); // if the atom in the molecule has higher degree than the atom in the // core, we have an attachment point: - if (newMol->getAtomWithIdx(mvit->second)->getDegree() > - coreQuery.getAtomWithIdx(mvit->first)->getDegree()) { + if (newMol->getAtomWithIdx(mvit.second)->getDegree() > + coreQuery.getAtomWithIdx(mvit.first)->getDegree()) { ROMol::ADJ_ITER nbrIdx, endNbrs; boost::tie(nbrIdx, endNbrs) = - newMol->getAtomNeighbors(newMol->getAtomWithIdx(mvit->second)); + newMol->getAtomNeighbors(newMol->getAtomWithIdx(mvit.second)); while (nbrIdx != endNbrs) { if (!matchingIndices[*nbrIdx]) { // this neighbor isn't in the match, convert it to a dummy atom and @@ -315,7 +312,7 @@ ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery, keepSet.set(*nbrIdx); dummyIndices.push_back(*nbrIdx); Atom *at = newMol->getAtomWithIdx(*nbrIdx); - Bond *b = newMol->getBondBetweenAtoms(mvit->second, *nbrIdx); + Bond *b = newMol->getBondBetweenAtoms(mvit.second, *nbrIdx); if (b) { b->setIsAromatic(false); b->setBondType(Bond::SINGLE); @@ -554,9 +551,7 @@ ROMol *replaceCore(const ROMol &mol, const ROMol &core, } unsigned int whichNbr = 0; std::list newBonds; - for (std::list::const_iterator lIter = nbrList.begin(); - lIter != nbrList.end(); ++lIter) { - unsigned int nbrIdx = *lIter; + for (unsigned int nbrIdx : nbrList) { Bond *connectingBond = newMol->getBondBetweenAtoms(mappingInfo.molIndex, nbrIdx); bool bondToCore = matchingIndices[nbrIdx] > -1; @@ -607,6 +602,10 @@ ROMol *replaceCore(const ROMol &mol, const ROMol &core, dummyAtomMap[newAt] = nbrIdx; keepList.push_back(newAt); Bond *bnd = connectingBond->copy(); + // If the connecting bond has stereo settings those cannot be preserved + if (bnd->getStereo() > Bond::STEREOANY) { + bnd->setStereo(Bond::STEREOANY); + } if (bnd->getBeginAtomIdx() == static_cast(mappingInfo.molIndex)) { bnd->setEndAtomIdx(newAt->getIdx()); @@ -616,6 +615,23 @@ ROMol *replaceCore(const ROMol &mol, const ROMol &core, newBonds.push_back(bnd); allNewBonds.push_back(bnd); + // Check to see if we are breaking a stereo bond definition, by removing one of the stereo atoms + // If so, set to the new atom + for (const auto bond : newMol->atomBonds(sidechainAtom)) { + if (bond->getIdx() == connectingBond->getIdx()) { + continue; + } + + if (bond->getStereo() > Bond::STEREOANY) { + auto &stereoAtoms = bond->getStereoAtoms(); + for (int& stereoAtom : stereoAtoms) { + if (stereoAtom == static_cast(nbrIdx)) { + stereoAtom = static_cast(newAt->getIdx()); + } + } + } + } + // we may be changing the bond ordering at the atom. // e.g. replacing the N in C[C@](Cl)(N)F gives an atom ordering of // C[C?](Cl)(F)[X] @@ -731,9 +747,9 @@ ROMol *replaceCore(const ROMol &mol, const ROMol &core, for (auto citer = mol.beginConformers(); citer != mol.endConformers(); ++citer) { Conformer &newConf = newMol->getConformer((*citer)->getId()); - for (auto iter = dummyAtomMap.begin(); iter != dummyAtomMap.end(); ++iter) { - newConf.setAtomPos(iter->first->getIdx(), - (*citer)->getAtomPos(iter->second)); + for (auto& iter : dummyAtomMap) { + newConf.setAtomPos(iter.first->getIdx(), + (*citer)->getAtomPos(iter.second)); } } diff --git a/Code/GraphMol/RGroupDecomposition/RGroupCore.cpp b/Code/GraphMol/RGroupDecomposition/RGroupCore.cpp index f2faf6d52bc..5dc5b076aa5 100644 --- a/Code/GraphMol/RGroupDecomposition/RGroupCore.cpp +++ b/Code/GraphMol/RGroupDecomposition/RGroupCore.cpp @@ -152,7 +152,6 @@ RWMOL_SPTR RCore::extractCoreFromMolMatch( ->getBondBetweenAtoms(pair.second, targetNeighborIndex) ->copy(); if (connectingBond->getStereo() > Bond::BondStereo::STEREOANY) { - // TODO: how to handle bond stereo on rgroups connected to core by // stereo double bonds connectingBond->setStereo(Bond::BondStereo::STEREOANY); } @@ -164,6 +163,23 @@ RWMOL_SPTR RCore::extractCoreFromMolMatch( } newBonds.push_back(connectingBond); + // Check to see if we are breaking a stereo bond definition, by removing one of the stereo atoms + // If so, set to the new atom + for (auto bond: extractedCore->atomBonds(targetAtom)) { + if (bond->getIdx() == connectingBond->getIdx()) { + continue; + } + + if (bond->getStereo() > Bond::STEREOANY) { + auto &stereoAtoms = bond->getStereoAtoms(); + for (int& stereoAtom : stereoAtoms) { + if (stereoAtom == static_cast(targetNeighborIndex)) { + stereoAtom = static_cast(newDummyIdx); + } + } + } + } + // Chirality parity stuff see RDKit::replaceCore in // Code/GraphMol/ChemTransforms/ChemTransforms.cpp if (isChiral) { diff --git a/Code/GraphMol/RGroupDecomposition/testRGroupDecomp.cpp b/Code/GraphMol/RGroupDecomposition/testRGroupDecomp.cpp index dd6e5ed7b6f..8e4deb90515 100644 --- a/Code/GraphMol/RGroupDecomposition/testRGroupDecomp.cpp +++ b/Code/GraphMol/RGroupDecomposition/testRGroupDecomp.cpp @@ -3926,7 +3926,7 @@ void testTautomerCore() { CHECK_RGROUP(it, expected2[i]); } - auto core3 = R"CTAB(" + auto core3 = R"CTAB( Mrv2008 08072313382D 9 9 0 0 0 0 999 V2000 @@ -3967,6 +3967,127 @@ M END } } +void testStereoBondBug() { + BOOST_LOG(rdInfoLog) + << "********************************************************\n"; + BOOST_LOG(rdInfoLog) << "Test that stereo bonds adjacent to the core or attachment atoms are handled correctly" + << std::endl; + + const auto core = R"CTAB(ACS Document 1996 + ChemDraw10242316092D + + 6 6 0 0 0 0 0 0 0 0999 V2000 + -0.7145 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7145 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 + 2 3 1 0 + 3 4 2 0 + 4 5 1 0 + 5 6 2 0 + 6 1 1 0 +M END +)CTAB"_ctab; + const auto mol = "C/C=C/C1=CC=CC=C1"_smiles; + RGroupDecompositionParameters params; + params.matchingStrategy = GreedyChunks; + params.allowMultipleRGroupsOnUnlabelled = true; + params.onlyMatchAtRGroups = false; + params.doEnumeration = false; + RGroupDecomposition decomp(*core, params); + const auto add1 = decomp.add(*mol); + TEST_ASSERT(add1 == 0); + decomp.process(); + auto rows = decomp.getRGroupsAsRows(); + auto r1 = rows[0]["R1"]; + // Check to see that Stereo bond is present and defined + bool foundStereo = false; + for (const auto bond: r1->bonds()) { + if (bond->getStereo() > Bond::STEREOANY) { + TEST_ASSERT(!foundStereo); + foundStereo = true; + TEST_ASSERT(bond->getStereoAtoms().size() == 2); + } + } + TEST_ASSERT(foundStereo); + + const auto core2 = R"CTAB( + ChemDraw10242316432D + + 7 7 0 0 0 0 0 0 0 0999 V2000 + -1.0717 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0717 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3572 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3572 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3572 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3572 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0717 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 0 + 2 3 1 0 0 + 3 4 2 0 0 + 4 5 1 0 0 + 5 6 2 0 0 + 6 1 1 0 0 + 5 7 1 0 0 +M END +)CTAB"_ctab; + RGroupDecomposition decomp2(*core2, params); + const auto add2 = decomp2.add(*mol); + TEST_ASSERT(add2 == 0); + decomp2.process(); + rows = decomp2.getRGroupsAsRows(); + r1 = rows[0]["R1"]; + // Check to see that Stereo bond is not present + foundStereo = false; + for (const auto bond: r1->bonds()) { + if (bond->getStereo() > Bond::STEREOANY) { + foundStereo = true; + } + } + TEST_ASSERT(!foundStereo); + + const auto core3 = R"CTAB( + ChemDraw10252316142D + + 7 7 0 0 0 0 0 0 0 0999 V2000 + -0.7145 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7145 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7145 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7145 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 0 + 2 3 1 0 0 + 3 4 1 0 0 + 4 5 2 0 0 + 5 6 1 0 0 + 6 1 1 0 0 + 6 7 2 0 0 +M END +)CTAB"_ctab; + const auto mol3 = "C/C=C1N=CCC=C/1"_smiles; + RGroupDecomposition decomp3(*core3, params); + const auto add3 = decomp3.add(*mol3); + TEST_ASSERT(add3 == 0); + decomp3.process(); + rows = decomp3.getRGroupsAsRows(); + const auto c1 = rows[0]["Core"]; + // Check to see that Stereo bond is not present + foundStereo = false; + for (const auto bond: c1->bonds()) { + if (bond->getStereo() > Bond::STEREOANY) { + TEST_ASSERT(!foundStereo); + foundStereo = true; + TEST_ASSERT(bond->getStereoAtoms().size() == 2); + } + } + TEST_ASSERT(foundStereo); +} + int main() { RDLog::InitLogs(); boost::logging::disable_logs("rdApp.debug"); @@ -4030,6 +4151,7 @@ int main() { testStereoGroupsPreserved(); testTautomerCore(); testEnumeratedCore(); + testStereoBondBug(); BOOST_LOG(rdInfoLog) << "********************************************************\n"; return 0; diff --git a/Code/JavaWrappers/csharp_wrapper/RDKitCSharpTest/RGroupDecompositionTest.cs b/Code/JavaWrappers/csharp_wrapper/RDKitCSharpTest/RGroupDecompositionTest.cs index e47cf47e633..c3cdb9d26fa 100644 --- a/Code/JavaWrappers/csharp_wrapper/RDKitCSharpTest/RGroupDecompositionTest.cs +++ b/Code/JavaWrappers/csharp_wrapper/RDKitCSharpTest/RGroupDecompositionTest.cs @@ -1,9 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using GraphMolWrap; +using GraphMolWrap; using Xunit; namespace RdkitTests @@ -39,9 +34,10 @@ M RGP 2 8 1 9 2 M END"; var core = RWMol.MolFromMolBlock(block); - var rgdParameters = new RGroupDecompositionParameters { - matchingStrategy = (uint) RGroupMatching.GreedyChunks, - scoreMethod = (uint) RGroupScore.FingerprintVariance, + var rgdParameters = new RGroupDecompositionParameters + { + matchingStrategy = (uint)RGroupMatching.GreedyChunks, + scoreMethod = (uint)RGroupScore.FingerprintVariance, onlyMatchAtRGroups = false, removeHydrogensPostMatch = true, removeAllHydrogenRGroups = true, @@ -61,5 +57,64 @@ M RGP 2 8 1 9 2 var rows = rgd.getRGroupsAsRows(); Assert.Equal(2, rows.Count); } + + [Fact] + public void TestBreakOnStereoBond() + { + var block = @"ACS Document 1996 + ChemDraw10242316092D + + 6 6 0 0 0 0 0 0 0 0999 V2000 + -0.7145 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7145 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 + 2 3 1 0 + 3 4 2 0 + 4 5 1 0 + 5 6 2 0 + 6 1 1 0 +M END +"; + var core = RWMol.MolFromMolBlock(block); + var rgdParameters = new RGroupDecompositionParameters + { + matchingStrategy = (uint)RGroupMatching.GreedyChunks, + scoreMethod = (uint)RGroupScore.FingerprintVariance, + onlyMatchAtRGroups = false, + removeHydrogensPostMatch = true, + removeAllHydrogenRGroups = true, + allowMultipleRGroupsOnUnlabelled = true, + doTautomers = false, + doEnumeration = false + }; + + var cores = new ROMol_Vect(); + cores.Add(core); + var rgd = new RGroupDecomposition(cores, rgdParameters); + var mol1 = RWMol.MolFromSmiles("C/C=C/C1=CC=CC=C1"); + Assert.Equal(0, rgd.add(mol1)); + Assert.True(rgd.process()); + var rows = rgd.getRGroupsAsRows(); + var columns = rgd.getRGroupsAsColumns(); + Assert.Equal(1, rows.Count); + var r1 = rows[0]["R1"]; + var foundBond = false; + for (var bondIdx = 0U; bondIdx < r1.getNumBonds(); bondIdx++) + { + var bond = r1.getBondWithIdx(bondIdx); + if (bond.getStereo() > Bond.BondStereo.STEREOANY) + { + Assert.False(foundBond); + foundBond = true; + var stereoAtoms = bond.getStereoAtoms(); + Assert.Equal(2, stereoAtoms?.Count); + } + } + Assert.True(foundBond); + } } -} +} \ No newline at end of file