diff --git a/.azure-pipelines/linux_build_java.yml b/.azure-pipelines/linux_build_java.yml index 94bdf6a5588..2d49cf4404e 100644 --- a/.azure-pipelines/linux_build_java.yml +++ b/.azure-pipelines/linux_build_java.yml @@ -1,7 +1,7 @@ steps: - bash: | sudo apt-get update - sudo apt-get install -y software-properties-common zlib1g zlib1g-dev swig3.0 + sudo apt-get install -y software-properties-common zlib1g zlib1g-dev swig sudo apt-get install -y libboost-all-dev libfreetype-dev libeigen3-dev displayName: Setup build environment - bash: | diff --git a/Code/GraphMol/GeneralizedSubstruct/XQMol.cpp b/Code/GraphMol/GeneralizedSubstruct/XQMol.cpp index 84ecdaf83df..810d9948be1 100644 --- a/Code/GraphMol/GeneralizedSubstruct/XQMol.cpp +++ b/Code/GraphMol/GeneralizedSubstruct/XQMol.cpp @@ -30,6 +30,29 @@ ExtendedQueryMol::ExtendedQueryMol(const std::string &text, bool isJSON) { } } +void ExtendedQueryMol::initFromOther(const ExtendedQueryMol &other) { + if (std::holds_alternative(other.xqmol)) { + xqmol = std::make_unique( + *std::get(other.xqmol)); + } else if (std::holds_alternative( + other.xqmol)) { + xqmol = std::make_unique( + *std::get(other.xqmol)); + } else if (std::holds_alternative( + other.xqmol)) { + xqmol = std::make_unique( + *std::get(other.xqmol)); + } else if (std::holds_alternative( + other.xqmol)) { + auto tb = std::make_unique>>(); + for (const auto &tqp : + *std::get(other.xqmol)) { + tb->emplace_back(std::make_unique(*tqp)); + } + xqmol = std::move(tb); + } +} + std::vector SubstructMatch( const ROMol &mol, const ExtendedQueryMol &query, const SubstructMatchParameters ¶ms) { diff --git a/Code/GraphMol/GeneralizedSubstruct/XQMol.h b/Code/GraphMol/GeneralizedSubstruct/XQMol.h index e6e27e52b63..79978f8564f 100644 --- a/Code/GraphMol/GeneralizedSubstruct/XQMol.h +++ b/Code/GraphMol/GeneralizedSubstruct/XQMol.h @@ -28,8 +28,7 @@ namespace RDKit { namespace GeneralizedSubstruct { -struct RDKIT_GENERALIZEDSUBSTRUCT_EXPORT ExtendedQueryMol - : private boost::noncopyable { +struct RDKIT_GENERALIZEDSUBSTRUCT_EXPORT ExtendedQueryMol { enum ExtendedQueryMolTypes : unsigned char { XQM_MOL = 1, XQM_MOLBUNDLE = 2, @@ -50,12 +49,21 @@ struct RDKIT_GENERALIZEDSUBSTRUCT_EXPORT ExtendedQueryMol ExtendedQueryMol( std::unique_ptr>> tqs) : xqmol(std::move(tqs)) {} + ExtendedQueryMol(const ExtendedQueryMol &other) { initFromOther(other); } + ExtendedQueryMol &operator=(const ExtendedQueryMol &other) { + if (this == &other) { + return *this; + } + initFromOther(other); + return *this; + } ExtendedQueryMol(ExtendedQueryMol &&o) noexcept : xqmol(std::move(o.xqmol)) {} ExtendedQueryMol(const std::string &text, bool isJSON = false); void initFromBinary(const std::string &pkl); void initFromJSON(const std::string &text); + void initFromOther(const ExtendedQueryMol &other); ContainedType xqmol; std::string toBinary() const; diff --git a/Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp b/Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp index a3c08532223..b193940c66c 100644 --- a/Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp +++ b/Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp @@ -123,45 +123,70 @@ TEST_CASE("tautomer bundle basics") { } } -TEST_CASE("createExtendedQueryMol") { +TEST_CASE("createExtendedQueryMol and copy ctors") { SECTION("RWMol") { auto mol = "COCC"_smiles; REQUIRE(mol); - auto xqm = createExtendedQueryMol(*mol); - CHECK(std::holds_alternative(xqm.xqmol)); - CHECK(SubstructMatch(*"COCC"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"COOCC"_smiles, xqm).empty()); + auto txqm = createExtendedQueryMol(*mol); + ExtendedQueryMol xqm1(txqm); + ExtendedQueryMol xqm2(std::make_unique(*mol)); + xqm2 = txqm; + + for (const auto &xqm : {txqm, xqm1, xqm2}) { + CHECK(std::holds_alternative(xqm.xqmol)); + CHECK(SubstructMatch(*"COCC"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"COOCC"_smiles, xqm).empty()); + } } SECTION("MolBundle") { auto mol = "COCC |LN:1:1.3|"_smiles; REQUIRE(mol); - auto xqm = createExtendedQueryMol(*mol); - CHECK(std::holds_alternative(xqm.xqmol)); - CHECK(SubstructMatch(*"COCC"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"COOCC"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"COOOCC"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"COOOOCC"_smiles, xqm).empty()); + auto txqm = createExtendedQueryMol(*mol); + ExtendedQueryMol xqm1(txqm); + ExtendedQueryMol xqm2(std::make_unique(*mol)); + xqm2 = txqm; + + for (const auto &xqm : {txqm, xqm1, xqm2}) { + CHECK(std::holds_alternative(xqm.xqmol)); + CHECK(SubstructMatch(*"COCC"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"COOCC"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"COOOCC"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"COOOOCC"_smiles, xqm).empty()); + } } SECTION("TautomerQuery") { auto mol1 = "CC1OC(N)=N1"_smiles; REQUIRE(mol1); - auto xqm = createExtendedQueryMol(*mol1); - CHECK(std::holds_alternative(xqm.xqmol)); - CHECK(SubstructMatch(*"CCC1OC(N)=N1"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"CCC1OC(=N)N1"_smiles, *mol1).empty()); - CHECK(SubstructMatch(*"CCC1OC(=N)N1"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"c1[nH]ncc1"_smiles, xqm).empty()); + auto txqm = createExtendedQueryMol(*mol1); + ExtendedQueryMol xqm1(txqm); + ExtendedQueryMol xqm2(std::make_unique(*mol1)); + xqm2 = txqm; + + for (const auto &xqm : {txqm, xqm1, xqm2}) { + CHECK( + std::holds_alternative(xqm.xqmol)); + CHECK(SubstructMatch(*"CCC1OC(N)=N1"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"CCC1OC(=N)N1"_smiles, *mol1).empty()); + CHECK(SubstructMatch(*"CCC1OC(=N)N1"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"c1[nH]ncc1"_smiles, xqm).empty()); + } } SECTION("TautomerBundle") { auto mol1 = "COCC1OC(N)=N1 |LN:1:1.3|"_smiles; REQUIRE(mol1); - auto xqm = createExtendedQueryMol(*mol1); - CHECK( - std::holds_alternative(xqm.xqmol)); - CHECK(SubstructMatch(*"COCC1(F)OC(N)=N1"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"COCC1OC(N)=N1"_smiles, xqm).size() == 1); - CHECK(SubstructMatch(*"COOOOCC1OC(=N)N1"_smiles, xqm).empty()); + auto txqm = createExtendedQueryMol(*mol1); + ExtendedQueryMol xqm1(txqm); + ExtendedQueryMol xqm2(std::make_unique(*mol1)); + xqm2 = txqm; + + for (const auto &xqm : {txqm, xqm1, xqm2}) { + CHECK(std::holds_alternative( + xqm.xqmol)); + CHECK(SubstructMatch(*"COCC1(F)OC(N)=N1"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"COCC1OC(N)=N1"_smiles, xqm).size() == 1); + CHECK(SubstructMatch(*"COOOOCC1OC(=N)N1"_smiles, xqm).empty()); + } } } diff --git a/Code/GraphMol/SubstructLibrary/CMakeLists.txt b/Code/GraphMol/SubstructLibrary/CMakeLists.txt index 2b38ddebaa2..05fe208bfa2 100644 --- a/Code/GraphMol/SubstructLibrary/CMakeLists.txt +++ b/Code/GraphMol/SubstructLibrary/CMakeLists.txt @@ -8,7 +8,7 @@ endif() rdkit_library(SubstructLibrary SubstructLibrary.cpp PatternFactory.cpp - LINK_LIBRARIES TautomerQuery MolStandardize Fingerprints SubstructMatch SmilesParse + LINK_LIBRARIES GeneralizedSubstruct TautomerQuery MolStandardize Fingerprints SubstructMatch SmilesParse GraphMol Catalogs DataStructs RDGeneral ${RDKit_SERIALIZATION_LIBS}) target_compile_definitions(SubstructLibrary PRIVATE RDKIT_SUBSTRUCTLIBRARY_BUILD) diff --git a/Code/GraphMol/SubstructLibrary/SubstructLibrary.cpp b/Code/GraphMol/SubstructLibrary/SubstructLibrary.cpp index 33098fd7eb9..6434a667049 100644 --- a/Code/GraphMol/SubstructLibrary/SubstructLibrary.cpp +++ b/Code/GraphMol/SubstructLibrary/SubstructLibrary.cpp @@ -38,10 +38,13 @@ #endif #include +#include #include namespace RDKit { +using namespace GeneralizedSubstruct; + bool SubstructLibraryCanSerialize() { #ifdef RDK_USE_BOOST_SERIALIZATION return true; @@ -55,9 +58,9 @@ struct Bits { const FPHolderBase *fps; SubstructMatchParameters params; - Bits(const FPHolderBase *fps, const ROMol &m, + Bits(const FPHolderBase *fingerprints, const ROMol &m, const SubstructMatchParameters &ssparams) - : fps(fps), params(ssparams) { + : fps(fingerprints), params(ssparams) { if (fps) { queryBits = fps->makeFingerprint(m); } else { @@ -86,6 +89,64 @@ struct Bits { } } + // FIX complete this + Bits(const FPHolderBase *fingerprints, const ExtendedQueryMol &xqm, + const SubstructMatchParameters &ssparams) + : fps(fingerprints), params(ssparams) { + if (fps) { + const auto *tph = dynamic_cast(fps); + const auto *ph = dynamic_cast(fps); + if (std::holds_alternative(xqm.xqmol)) { + queryBits = fps->makeFingerprint( + *std::get(xqm.xqmol)); + } else if (std::holds_alternative( + xqm.xqmol)) { + auto &bndl = std::get(xqm.xqmol); + auto tqb = new ExplicitBitVect(ph->getNumBits()); + queryBits = tqb; + for (auto mol : bndl->getMols()) { + auto tfp = fps->makeFingerprint(*mol); + *tqb &= *tfp; + delete tfp; + } + } else if (std::holds_alternative( + xqm.xqmol)) { + auto &tq = std::get(xqm.xqmol); + if (!tph) { + BOOST_LOG(rdWarningLog) << "Pattern fingerprints for tautomersearch " + "aren't tautomer fingerprints, ignoring..." + << std::endl; + queryBits = nullptr; + fps = nullptr; + } else { + queryBits = tq->patternFingerprintTemplate(tph->getNumBits()); + } + } else if (std::holds_alternative( + xqm.xqmol)) { + if (!tph) { + BOOST_LOG(rdWarningLog) << "Pattern fingerprints for tautomersearch " + "aren't tautomer fingerprints, ignoring..." + << std::endl; + queryBits = nullptr; + fps = nullptr; + } else { + auto &bndl = std::get(xqm.xqmol); + auto tqb = new ExplicitBitVect(ph->getNumBits()); + queryBits = tqb; + for (auto &tq : *bndl) { + auto tfp = tq->patternFingerprintTemplate(tph->getNumBits()); + *tqb &= *tfp; + delete tfp; + } + } + } else { + queryBits = nullptr; + } + } else { + queryBits = nullptr; + } + } + bool check(unsigned int idx) const { if (fps) { return fps->passesFilter(idx, *queryBits); @@ -139,6 +200,36 @@ bool query_needs_rings(const TautomerQuery &in_query) { return query_needs_rings(in_query.getTemplateMolecule()); } +bool query_needs_rings(const ExtendedQueryMol &xqm) { + if (std::holds_alternative(xqm.xqmol)) { + return query_needs_rings(*std::get(xqm.xqmol)); + } else if (std::holds_alternative( + xqm.xqmol)) { + return query_needs_rings( + std::get(xqm.xqmol) + ->getTemplateMolecule()); + } else if (std::holds_alternative(xqm.xqmol)) { + for (const auto &mol : + std::get(xqm.xqmol)->getMols()) { + if (query_needs_rings(*mol)) { + return true; + } + } + return false; + } else if (std::holds_alternative( + xqm.xqmol)) { + for (const auto &tq : + *std::get(xqm.xqmol)) { + if (query_needs_rings(tq->getTemplateMolecule())) { + return true; + } + } + return false; + } + return true; // if we somehow get here, we better assume that rings are + // necessary +} + template void SubSearcher(const Query &in_query, const Bits &bits, const MolHolderBase &mols, unsigned int start, @@ -149,6 +240,8 @@ void SubSearcher(const Query &in_query, const Bits &bits, std::vector *idxs) { PRECONDITION(searchOrder.empty() || searchOrder.size() >= end, "bad searchOrder data"); + // we copy the query so that we don't end up with lock contention for + // recursive matchers when using multiple threads Query query(in_query); for (unsigned int idx = start; idx < end; idx += numThreads) { unsigned int sidx = idx; @@ -176,8 +269,8 @@ void SubSearcher(const Query &in_query, const Bits &bits, if (idxs) { idxs->push_back(sidx); if (maxResults > 0 && counter == maxResults) { - // if we reached maxResults, record the last idx we processed and bail - // out + // if we reached maxResults, record the last idx we processed and + // bail out end = idx; break; } @@ -258,18 +351,18 @@ int internalGetMatches(const Query &query, MolHolderBase &mols, // If maxResults was close to the theoretical maximum, some threads // might have even run out of molecules to screen without reaching // maxResults so we need to make sure that all threads have screened as - // many molecules as the most productive thread if we want multi-threaded - // runs to yield the same results independently from the number of - // threads. + // many molecules as the most productive thread if we want + // multi-threaded runs to yield the same results independently from the + // number of threads. thread_group_idx = 0; for (auto &fut : thread_group) { fut.get(); counter += counterVect[thread_group_idx++]; } thread_group.clear(); - // Find out out the max number of molecules that was screened by the most - // productive thread and do the same in all other threads, unless the - // max number of molecules was reached + // Find out out the max number of molecules that was screened by the + // most productive thread and do the same in all other threads, unless + // the max number of molecules was reached maxEndIdx = *std::max_element(endIdxVect.begin(), endIdxVect.end()); for (thread_group_idx = 0; thread_group_idx < numThreads; ++thread_group_idx) { @@ -395,6 +488,17 @@ std::vector SubstructLibrary::getMatches( return idxs; } +std::vector SubstructLibrary::getMatches( + const ExtendedQueryMol &query, unsigned int startIdx, unsigned int endIdx, + const SubstructMatchParameters ¶ms, int numThreads, + int maxResults) const { + std::vector idxs; + boost::dynamic_bitset<> found(mols->size()); + internalGetMatches(query, *mols, fps, startIdx, endIdx, params, numThreads, + maxResults, found, searchOrder, &idxs); + return idxs; +} + unsigned int SubstructLibrary::countMatches( const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads) const { @@ -417,6 +521,14 @@ unsigned int SubstructLibrary::countMatches( numThreads, -1, searchOrder, nullptr); } +unsigned int SubstructLibrary::countMatches( + const ExtendedQueryMol &query, unsigned int startIdx, unsigned int endIdx, + const SubstructMatchParameters ¶ms, int numThreads) const { + boost::dynamic_bitset<> found(mols->size()); + return internalGetMatches(query, *mols, fps, startIdx, endIdx, params, + numThreads, -1, found, searchOrder, nullptr); +} + bool SubstructLibrary::hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, @@ -442,6 +554,14 @@ bool SubstructLibrary::hasMatch(const MolBundle &query, unsigned int startIdx, return getMatches(query, startIdx, endIdx, params, numThreads, maxResults) .size() > 0; } +bool SubstructLibrary::hasMatch(const ExtendedQueryMol &query, + unsigned int startIdx, unsigned int endIdx, + const SubstructMatchParameters ¶ms, + int numThreads) const { + const int maxResults = 1; + return getMatches(query, startIdx, endIdx, params, numThreads, maxResults) + .size() > 0; +} void SubstructLibrary::toStream(std::ostream &ss) const { #ifndef RDK_USE_BOOST_SERIALIZATION diff --git a/Code/GraphMol/SubstructLibrary/SubstructLibrary.h b/Code/GraphMol/SubstructLibrary/SubstructLibrary.h index db95cd9611d..5eaf2c8db29 100644 --- a/Code/GraphMol/SubstructLibrary/SubstructLibrary.h +++ b/Code/GraphMol/SubstructLibrary/SubstructLibrary.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,8 @@ namespace RDKit { +using GeneralizedSubstruct::ExtendedQueryMol; + RDKIT_SUBSTRUCTLIBRARY_EXPORT bool SubstructLibraryCanSerialize(); //! Base class API for holding molecules to substructure search. @@ -743,6 +746,13 @@ class RDKIT_SUBSTRUCTLIBRARY_EXPORT SubstructLibrary { const SubstructMatchParameters ¶ms, int numThreads = -1, int maxResults = -1) const; + //! overload + std::vector getMatches(const ExtendedQueryMol &query, + unsigned int startIdx, + unsigned int endIdx, + const SubstructMatchParameters ¶ms, + int numThreads = -1, + int maxResults = -1) const; //! Return the number of matches for the query /*! @@ -819,6 +829,11 @@ class RDKIT_SUBSTRUCTLIBRARY_EXPORT SubstructLibrary { unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads = -1) const; + //! overload + unsigned int countMatches(const ExtendedQueryMol &query, + unsigned int startIdx, unsigned int endIdx, + const SubstructMatchParameters ¶ms, + int numThreads = -1) const; //! Returns true if any match exists for the query /*! @@ -883,6 +898,10 @@ class RDKIT_SUBSTRUCTLIBRARY_EXPORT SubstructLibrary { bool hasMatch(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads = -1) const; + //! overload + bool hasMatch(const ExtendedQueryMol &query, unsigned int startIdx, + unsigned int endIdx, const SubstructMatchParameters ¶ms, + int numThreads = -1) const; //! Returns the molecule at the given index /*! \param idx Index of the molecule in the library (n.b. could contain diff --git a/Code/GraphMol/SubstructLibrary/Wrap/SubstructLibraryWrap.cpp b/Code/GraphMol/SubstructLibrary/Wrap/SubstructLibraryWrap.cpp index b392b18d62b..552b6864101 100644 --- a/Code/GraphMol/SubstructLibrary/Wrap/SubstructLibraryWrap.cpp +++ b/Code/GraphMol/SubstructLibrary/Wrap/SubstructLibraryWrap.cpp @@ -37,12 +37,15 @@ #include #include +#include namespace python = boost::python; using boost_adaptbx::python::streambuf; namespace RDKit { +using GeneralizedSubstruct::ExtendedQueryMol; + // Because we need to release the GIL before we launch a thread, we need to make // a thin stub // for every function that does this. This stub exists Because I couldn't @@ -110,29 +113,8 @@ class SubstructLibraryWrap { maxResults); }; - std::vector getMatches(const ROMol &query, - unsigned int startIdx, - unsigned int endIdx, - const SubstructMatchParameters ¶ms, - int numThreads = -1, - int maxResults = -1) const { - NOGIL h; - return ss.getMatches(query, startIdx, endIdx, params, numThreads, - maxResults); - } - - std::vector getMatches(const MolBundle &query, - unsigned int startIdx, - unsigned int endIdx, - const SubstructMatchParameters ¶ms, - int numThreads = -1, - int maxResults = -1) const { - NOGIL h; - return ss.getMatches(query, startIdx, endIdx, params, numThreads, - maxResults); - } - //! overload - std::vector getMatches(const TautomerQuery &query, + template + std::vector getMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, @@ -172,23 +154,8 @@ class SubstructLibraryWrap { useChirality, useQueryQueryMatches, numThreads); }; - unsigned int countMatches(const ROMol &query, unsigned int startIdx, - unsigned int endIdx, - const SubstructMatchParameters ¶ms, - int numThreads = -1) const { - NOGIL h; - return ss.countMatches(query, startIdx, endIdx, params, numThreads); - } - - unsigned int countMatches(const TautomerQuery &query, unsigned int startIdx, - unsigned int endIdx, - const SubstructMatchParameters ¶ms, - int numThreads = -1) const { - NOGIL h; - return ss.countMatches(query, startIdx, endIdx, params, numThreads); - } - - unsigned int countMatches(const MolBundle &query, unsigned int startIdx, + template + unsigned int countMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads = -1) const { @@ -220,27 +187,14 @@ class SubstructLibraryWrap { useQueryQueryMatches, numThreads); }; - bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx, + template + bool hasMatch(const Query &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads = -1) const { NOGIL h; return ss.hasMatch(query, startIdx, endIdx, params, numThreads); } - bool hasMatch(const TautomerQuery &query, unsigned int startIdx, - unsigned int endIdx, const SubstructMatchParameters ¶ms, - int numThreads = -1) const { - NOGIL h; - return ss.hasMatch(query, startIdx, endIdx, params, numThreads); - } - - bool hasMatch(const MolBundle &query, unsigned int startIdx, - unsigned int endIdx, const SubstructMatchParameters ¶ms, - int numThreads = -1) const { - NOGIL h; - return ss.hasMatch(query, startIdx, endIdx, params, numThreads); - } - boost::shared_ptr getMol(unsigned int idx) const { return ss.getMol(idx); } @@ -794,6 +748,7 @@ struct substructlibrary_wrapper { LARGE_DEF(ROMol) LARGE_DEF(TautomerQuery) LARGE_DEF(MolBundle) + LARGE_DEF(ExtendedQueryMol) // clang-format on .def("GetMol", &SubstructLibraryWrap::getMol, diff --git a/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py b/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py index 3bfd03b5fb2..873189b7dea 100644 --- a/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py +++ b/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py @@ -49,7 +49,7 @@ import time from rdkit import Chem -from rdkit.Chem import rdSubstructLibrary +from rdkit.Chem import rdSubstructLibrary, rdGeneralizedSubstruct, rdTautomerQuery def load_tests(loader, tests, ignore): @@ -685,6 +685,53 @@ def test_bad_smiles(self): pylog.setLevel(logging.WARN) rdBase.LogToCppStreams() + def test_using_xqms(self): + smis = ["COCC=O", "COOCC=O", "COOOCC=O", "COOOOCC=O"] + + for holder in [ + rdSubstructLibrary.CachedSmilesMolHolder(), + rdSubstructLibrary.CachedTrustedSmilesMolHolder() + ]: + for smi in smis: + holder.AddSmiles(smi) + fph = rdSubstructLibrary.TautomerPatternHolder() + lib = rdSubstructLibrary.SubstructLibrary(holder) + + mol = Chem.MolFromSmiles("COCC") + xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol) + res = lib.GetMatches(xqm) + self.assertEqual(list(res), [0]) + self.assertTrue(lib.HasMatch(xqm)) + self.assertEqual(lib.CountMatches(xqm), 1) + + mol = Chem.MolFromSmiles("COC=CO") + xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol) + res = lib.GetMatches(xqm) + self.assertEqual(list(res), [0]) + self.assertTrue(lib.HasMatch(xqm)) + self.assertEqual(lib.CountMatches(xqm), 1) + + mol = Chem.MolFromSmiles("COCC |LN:1:1.3|") + xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol) + res = lib.GetMatches(xqm) + self.assertEqual(list(res), [0, 1, 2]) + self.assertTrue(lib.HasMatch(xqm)) + self.assertEqual(lib.CountMatches(xqm), 3) + + mol = Chem.MolFromSmiles("COC=CO |LN:1:1.3|") + xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol) + res = lib.GetMatches(xqm) + self.assertEqual(list(res), [0, 1, 2]) + self.assertTrue(lib.HasMatch(xqm)) + self.assertEqual(lib.CountMatches(xqm), 3) + + mol = Chem.MolFromSmiles("CNC=CO |LN:1:1.3|") + xqm = rdGeneralizedSubstruct.CreateExtendedQueryMol(mol) + res = lib.GetMatches(xqm) + self.assertEqual(list(res), []) + self.assertFalse(lib.HasMatch(xqm)) + self.assertEqual(lib.CountMatches(xqm), 0) + if __name__ == '__main__': unittest.main() diff --git a/Code/GraphMol/SubstructLibrary/catch_tests.cpp b/Code/GraphMol/SubstructLibrary/catch_tests.cpp index 856e6fb1e8c..0c3907a93bd 100644 --- a/Code/GraphMol/SubstructLibrary/catch_tests.cpp +++ b/Code/GraphMol/SubstructLibrary/catch_tests.cpp @@ -17,6 +17,7 @@ #include #include #include +#include using namespace RDKit; @@ -232,4 +233,95 @@ TEST_CASE("searchOrderFunctionDemo") { CHECK(libMatches.size() == 5); CHECK(libMatches == std::vector{3, 2, 0, 1, 4}); } -} \ No newline at end of file +} + +TEST_CASE("ExtendedQueryMol") { + std::vector libSmiles = {"COCC=O", "COOCC=O", "COOOCC=O", + "COOOOCC=O"}; + boost::shared_ptr mholder(new MolHolder()); + boost::shared_ptr fpholder(new TautomerPatternHolder()); + + SubstructLibrary ssslib(mholder, fpholder); + + for (const auto &smi : libSmiles) { + std::unique_ptr mol(SmilesToMol(smi)); + REQUIRE(mol); + ssslib.addMol(*mol); + } + SECTION("mol") { + auto mol = "COCC"_smiles; + auto xqm = GeneralizedSubstruct::createExtendedQueryMol(*mol); + + auto libMatches = ssslib.getMatches(xqm); + CHECK(libMatches.size() == 1); + CHECK(libMatches == std::vector{0}); + CHECK(ssslib.countMatches(xqm) == 1); + CHECK(ssslib.hasMatch(xqm)); + } + SECTION("tautomer query") { + auto mol = "COCC"_smiles; + auto xqm = GeneralizedSubstruct::ExtendedQueryMol( + std::unique_ptr(TautomerQuery::fromMol(*mol))); + auto libMatches = ssslib.getMatches(xqm); + CHECK(libMatches.size() == 1); + CHECK(libMatches == std::vector{0}); + CHECK(ssslib.countMatches(xqm) == 1); + CHECK(ssslib.hasMatch(xqm)); + } + SECTION("mol bundle") { + auto mol = "COCC |LN:1:1.3|"_smiles; + auto xqm = GeneralizedSubstruct::createExtendedQueryMol(*mol); + + auto libMatches = ssslib.getMatches(xqm); + CHECK(libMatches.size() == 3); + CHECK(libMatches == std::vector{0, 1, 2}); + CHECK(ssslib.countMatches(xqm) == 3); + CHECK(ssslib.hasMatch(xqm)); + } + SECTION("tautomer bundle") { + auto mol = "COC=CO |LN:1:1.3|"_smiles; + auto xqm = GeneralizedSubstruct::createExtendedQueryMol(*mol); + + auto libMatches = ssslib.getMatches(xqm); + CHECK(libMatches.size() == 3); + CHECK(libMatches == std::vector{0, 1, 2}); + CHECK(ssslib.countMatches(xqm) == 3); + CHECK(ssslib.hasMatch(xqm)); + } + SECTION("mol no match") { + auto mol = "CNCC"_smiles; + auto xqm = GeneralizedSubstruct::createExtendedQueryMol(*mol); + + auto libMatches = ssslib.getMatches(xqm); + CHECK(libMatches.empty()); + CHECK(ssslib.countMatches(xqm) == 0); + CHECK(!ssslib.hasMatch(xqm)); + } + SECTION("tautomer query no match") { + auto mol = "CNCC"_smiles; + auto xqm = GeneralizedSubstruct::ExtendedQueryMol( + std::unique_ptr(TautomerQuery::fromMol(*mol))); + auto libMatches = ssslib.getMatches(xqm); + CHECK(libMatches.empty()); + CHECK(ssslib.countMatches(xqm) == 0); + CHECK(!ssslib.hasMatch(xqm)); + } + SECTION("mol bundle no match") { + auto mol = "CNCC |LN:1:1.3|"_smiles; + auto xqm = GeneralizedSubstruct::createExtendedQueryMol(*mol); + + auto libMatches = ssslib.getMatches(xqm); + CHECK(libMatches.empty()); + CHECK(ssslib.countMatches(xqm) == 0); + CHECK(!ssslib.hasMatch(xqm)); + } + SECTION("tautomer bundle no match") { + auto mol = "CNC=CO |LN:1:1.3|"_smiles; + auto xqm = GeneralizedSubstruct::createExtendedQueryMol(*mol); + + auto libMatches = ssslib.getMatches(xqm); + CHECK(libMatches.empty()); + CHECK(ssslib.countMatches(xqm) == 0); + CHECK(!ssslib.hasMatch(xqm)); + } +} diff --git a/Code/JavaWrappers/CMakeLists.txt b/Code/JavaWrappers/CMakeLists.txt index 1bb96e47b1d..f7e330af3ac 100644 --- a/Code/JavaWrappers/CMakeLists.txt +++ b/Code/JavaWrappers/CMakeLists.txt @@ -20,7 +20,7 @@ if(RDK_BUILD_INCHI_SUPPORT) set(swigRDKitLibList "${swigRDKitLibList}RDInchiLib;${INCHI_LIBRARIES};") endif(RDK_BUILD_INCHI_SUPPORT) set(swigRDKitLibList "${swigRDKitLibList}" - "Abbreviations;ScaffoldNetwork;MolHash;RGroupDecomposition;SubstructLibrary;TautomerQuery;" + "GeneralizedSubstruct;Abbreviations;ScaffoldNetwork;MolHash;RGroupDecomposition;SubstructLibrary;TautomerQuery;" "MolEnumerator;" "MolStandardize;FilterCatalog;Catalogs;FMCS;MolDraw2D;FileParsers;SmilesParse;MarvinParser;" "Depictor;SubstructMatch;ChemReactions;Fingerprints;ChemTransforms;" diff --git a/Code/JavaWrappers/GeneralizedSubstruct.i b/Code/JavaWrappers/GeneralizedSubstruct.i new file mode 100644 index 00000000000..89d5b13e394 --- /dev/null +++ b/Code/JavaWrappers/GeneralizedSubstruct.i @@ -0,0 +1,16 @@ + +%{ +#include +%} +// %include "std_unique_ptr.i" +// %unique_ptr(ExtendedQueryMol) + +%ignore ExtendedQueryMol(std::unique_ptr mol); +%ignore ExtendedQueryMol(std::unique_ptr mol); +%ignore ExtendedQueryMol(std::unique_ptr mol); +%ignore ExtendedQueryMol( + std::unique_ptr>> tqs); +%ignore xqmol; + +%include "GraphMol/GeneralizedSubstruct/XQMol.h"; + diff --git a/Code/JavaWrappers/SubstructLibrary.i b/Code/JavaWrappers/SubstructLibrary.i index 3f9cfda237f..02e4b60c087 100644 --- a/Code/JavaWrappers/SubstructLibrary.i +++ b/Code/JavaWrappers/SubstructLibrary.i @@ -36,6 +36,8 @@ %{ #include #include +#include +using RDKit::GeneralizedSubstruct::ExtendedQueryMol; %} %shared_ptr(RDKit::TautomerQuery) %shared_ptr(RDKit::MolHolderBase) @@ -102,10 +104,13 @@ %extend RDKit::SubstructLibrary { %template(getMatches) getMatches; %template(getMatches) getMatches; + %template(getMatches) getMatches; %template(countMatches) countMatches; %template(countMatches) countMatches; + %template(countMatches) countMatches; %template(hasMatch) hasMatch; %template(hasMatch) hasMatch; + %template(hasMatch) hasMatch; } diff --git a/Code/JavaWrappers/gmwrapper/GraphMolJava.i b/Code/JavaWrappers/gmwrapper/GraphMolJava.i index 89cf08d18eb..d6b39242049 100644 --- a/Code/JavaWrappers/gmwrapper/GraphMolJava.i +++ b/Code/JavaWrappers/gmwrapper/GraphMolJava.i @@ -223,6 +223,7 @@ typedef unsigned long long int uintmax_t; %include "../MolHash.i" %include "../Abbreviations.i" %include "../Streams.i" +%include "../GeneralizedSubstruct.i" // Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java #ifdef INCLUDE_ERROR_GENERATOR diff --git a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/SubstructLibraryTests.java b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/SubstructLibraryTests.java index 0d07fcd4ef8..3023606c18d 100644 --- a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/SubstructLibraryTests.java +++ b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/SubstructLibraryTests.java @@ -353,6 +353,34 @@ public void test5Basics() { assertEquals(ids.get(0), "foo"); assertEquals(ids2.get(0), "foo"); } + + + + @Test + public void test6XQM() { + MolHolder mh = new MolHolder(); + PatternHolder pat = new TautomerPatternHolder(); + assertEquals(0, mh.size()); + // mol holder + SubstructLibrary lib = new SubstructLibrary(mh, pat); + + mol = RWMol.MolFromSmiles("COCC=O"); + lib.addMol(mol); + mol = RWMol.MolFromSmiles("COOCC=O"); + lib.addMol(mol); + mol = RWMol.MolFromSmiles("COOOCC=O"); + lib.addMol(mol); + mol = RWMol.MolFromSmiles("COOOOCC=O"); + lib.addMol(mol); + + + RWMol rwm = RWMol.MolFromSmiles("COC=CO |LN:1:1.3|"); + ExtendedQueryMol qry = RDKFuncs.createExtendedQueryMol(rwm); + + UInt_Vect matches = lib.getMatches(qry); + assertEquals(3, matches.size()); + } + public static void main(String args[]) { org.junit.runner.JUnitCore.main("org.RDKit.SubstructLibraryTests");