From 2469021f95790a9b207c6d266fa1931c04fcec4a Mon Sep 17 00:00:00 2001 From: ishitani Date: Sun, 15 Sep 2024 11:06:09 +0900 Subject: [PATCH] sdf mol writer impl --- pymod/pyproject.toml | 15 + src/modules/importers/SDFMolReader.cpp | 409 +++++++++++------------ src/modules/importers/SDFMolWriter.cpp | 24 +- src/modules/molstr/MolAtom.cpp | 4 +- src/modules/molstr/MolAtom.hpp | 13 +- src/modules/molstr/MolAtom.qif | 2 + src/modules/molstr/MolCoord.cpp | 16 +- src/modules/molstr/MolCoord.hpp | 2 + src/modules/molstr/MolCoord.qif | 2 + src/modules/molstr/MolResidue.hpp | 2 +- tests/importers_tests/test_sdf_reader.py | 6 +- tests/test_data/test1.sdf | 217 +++++------- tests/test_data/test_chg1.sdf | 128 +++++++ 13 files changed, 471 insertions(+), 369 deletions(-) create mode 100644 tests/test_data/test_chg1.sdf diff --git a/pymod/pyproject.toml b/pymod/pyproject.toml index f167a801..7359dd4e 100644 --- a/pymod/pyproject.toml +++ b/pymod/pyproject.toml @@ -19,3 +19,18 @@ wheel.packages = ["python/cuemol"] # [tool.scikit-build.wheel.packages] # "example/mypackage" = "python/mypackage" + +[tool.black] +line-length = 88 +target-version = ["py38"] + +[tool.isort] +default_section = "THIRDPARTY" +ensure_newline_before_comments = true +force_grid_wrap = 0 +force_single_line = false +include_trailing_comma = true +known_first_party = ["pfdock", "pfdock_internal"] +line_length = 88 +multi_line_output = 3 +use_parentheses = true diff --git a/src/modules/importers/SDFMolReader.cpp b/src/modules/importers/SDFMolReader.cpp index 508c0047..3515919e 100644 --- a/src/modules/importers/SDFMolReader.cpp +++ b/src/modules/importers/SDFMolReader.cpp @@ -7,54 +7,53 @@ #include "SDFMolReader.hpp" -#include - +#include #include #include -#include #include #include +#include using namespace molstr; using namespace importers; SDFMolReader::SDFMolReader() { - m_nReadBonds = 0; - m_nReadAtoms = 0; - m_nReadCmpds = 0; - - m_iLoadCmpd = -1; - m_bLoadAsChain = false; - m_chainName = "A"; - m_nResInd = 1; + m_nReadBonds = 0; + m_nReadAtoms = 0; + m_nReadCmpds = 0; + + m_iLoadCmpd = -1; + m_bLoadAsChain = false; + m_chainName = "A"; + m_nResInd = 1; } SDFMolReader::~SDFMolReader() { - MB_DPRINTLN("SDFMolReader destructed (%p)", this); + MB_DPRINTLN("SDFMolReader destructed (%p)", this); } ///////////// const char *SDFMolReader::getName() const { - return "sdf"; + return "sdf"; } const char *SDFMolReader::getTypeDescr() const { - return "MOL/SDF Coordinates (*.mol;*.sdf)"; + return "MOL/SDF Coordinates (*.mol;*.sdf)"; } const char *SDFMolReader::getFileExt() const { - return "*.mol; *.sdf"; + return "*.mol; *.sdf"; } qsys::ObjectPtr SDFMolReader::createDefaultObj() const { - return qsys::ObjectPtr(MB_NEW MolCoord()); + return qsys::ObjectPtr(MB_NEW MolCoord()); } ///////// @@ -62,208 +61,206 @@ qsys::ObjectPtr SDFMolReader::createDefaultObj() const /// read SDF from stream bool SDFMolReader::read(qlib::InStream &ins) { - m_pMol = MolCoordPtr(getTarget()); - - m_nReadAtoms = 0; - m_nReadBonds = 0; - m_nReadCmpds = 0; - - qlib::LineStream lin(ins); - LString str; - - int cmpd_id; - for (cmpd_id=0;; cmpd_id++) { - if (m_bLoadAsChain) { - m_sCurrChName = MolCoord::encodeModelInChain(m_chainName, cmpd_id); - m_nCurrResid = m_nResInd; - } - else { - m_sCurrChName = m_chainName; - m_nCurrResid = cmpd_id + m_nResInd; - } - - bool bskip = false; - if (m_iLoadCmpd>= 0 && cmpd_id != m_iLoadCmpd) - bskip = true; - - readMol(lin, bskip); - if (!bskip) - m_nReadCmpds ++; - - for (;;) { - str = lin.readLine(); - if (str.trim().isEmpty() && !lin.ready()) { - LOG_DPRINTLN("SDFMolReader> read %d cmpds/%d atoms/%d bonds", - m_nReadCmpds, m_nReadAtoms, m_nReadBonds); - return true; - } - - if (str.startsWith("$$$$")) - break; + m_pMol = MolCoordPtr(getTarget()); + + m_nReadAtoms = 0; + m_nReadBonds = 0; + m_nReadCmpds = 0; + + qlib::LineStream lin(ins); + LString str; + + int cmpd_id; + for (cmpd_id = 0;; cmpd_id++) { + if (m_bLoadAsChain) { + m_sCurrChName = MolCoord::encodeModelInChain(m_chainName, cmpd_id); + m_nCurrResid = m_nResInd; + } else { + m_sCurrChName = m_chainName; + m_nCurrResid = cmpd_id + m_nResInd; + } + + bool bskip = false; + if (m_iLoadCmpd >= 0 && cmpd_id != m_iLoadCmpd) bskip = true; + + readMol(lin, bskip); + if (!bskip) m_nReadCmpds++; + + for (;;) { + str = lin.readLine(); + if (str.trim().isEmpty() && !lin.ready()) { + LOG_DPRINTLN("SDFMolReader> read %d cmpds/%d atoms/%d bonds", + m_nReadCmpds, m_nReadAtoms, m_nReadBonds); + return true; + } + + if (str.startsWith("$$$$")) break; + } } - } - - // NOT REACHED - return true; + + // NOT REACHED + return true; } /// read one MOL entry from stream void SDFMolReader::readMol(qlib::LineStream &lin, bool bskip) { - LString cmpd_name = lin.readLine(); - cmpd_name = cmpd_name.trim(" \t\r\n"); - if (cmpd_name.isEmpty() && !lin.ready()) - return; - lin.readLine(); - lin.readLine(); - - if (bskip) - LOG_DPRINTLN("SDFMolReader> skipping compound <%s>", cmpd_name.c_str()); - else - LOG_DPRINTLN("SDFMolReader> reading compound <%s>", cmpd_name.c_str()); - - LString str_ct = lin.readLine(); - - LString str_natom = str_ct.substr(0,3); - LString str_nbond = str_ct.substr(3,3); - LString str_ver = str_ct.substr(33,6); - - if (!str_ver.equals(" V2000")) { - LString msg = LString::format("Unsupported MOL/SDF version <%s>", str_ver.c_str()); - MB_THROW(SDFFormatException, msg); - } - - int natom; - if (!str_natom.toInt(&natom)) { - LString msg = LString::format("Invalid natom <%s> in CT line", str_natom.c_str()); - MB_THROW(SDFFormatException, msg); - } - int nbond; - if (!str_nbond.toInt(&nbond)) { - LString msg = LString::format("Invalid nbond <%s> in CT line", str_nbond.c_str()); - MB_THROW(SDFFormatException, msg); - } - - //LOG_DPRINTLN("SDFMolReader> natom: %d", natom); - //LOG_DPRINTLN("SDFMolReader> nbond: %d", nbond); - - int i; - LString str, sx, sy, sz, satom, aname; - double xx, yy, zz; - - std::vector elem_counts(ElemSym::MAX, 0); - std::map atommap; - - for (i=0; i", sx.c_str()); - - if (!sx.toDouble(&xx)) - MB_THROW(SDFFormatException, "invalid atom line (x coord):"+str); - if (!sy.toDouble(&yy)) - MB_THROW(SDFFormatException, "invalid atom line (y coord):"+str); - if (!sz.toDouble(&zz)) - MB_THROW(SDFFormatException, "invalid atom line (z coord):"+str); - - ElemID eleid = ElemSym::str2SymID(satom); - elem_counts[eleid] += 1; - aname = LString::format("%s%d", ElemSym::symID2Str(eleid).c_str(), elem_counts[eleid]); - - // LOG_DPRINTLN("Atom: %f, %f, %f, <%s> %d", xx, yy, zz, aname.c_str(), eleid); + LString cmpd_name = lin.readLine(); + cmpd_name = cmpd_name.trim(" \t\r\n"); + if (cmpd_name.isEmpty() && !lin.ready()) return; + lin.readLine(); + lin.readLine(); + + if (bskip) + LOG_DPRINTLN("SDFMolReader> skipping compound <%s>", cmpd_name.c_str()); + else + LOG_DPRINTLN("SDFMolReader> reading compound <%s>", cmpd_name.c_str()); + + LString str_ct = lin.readLine(); + + LString str_natom = str_ct.substr(0, 3); + LString str_nbond = str_ct.substr(3, 3); + LString str_ver = str_ct.substr(33, 6); + + if (!str_ver.equals(" V2000")) { + LString msg = + LString::format("Unsupported MOL/SDF version <%s>", str_ver.c_str()); + MB_THROW(SDFFormatException, msg); + } - if (!bskip) { - MolAtomPtr pAtom = MolAtomPtr(MB_NEW MolAtom()); - pAtom->setParentUID(m_pMol->getUID()); - pAtom->setName(aname); - pAtom->setElement(eleid); - - pAtom->setChainName(m_sCurrChName); - pAtom->setResIndex(m_nCurrResid); - pAtom->setResName(cmpd_name); - - pAtom->setPos(Vector4D(xx,yy,zz)); - pAtom->setBfac(0.0); - pAtom->setOcc(1.0); - - int naid = m_pMol->appendAtom(pAtom); - if (naid<0) - MB_THROW(SDFFormatException, "invalid SDF format, appendAtom() failed!!"); - - atommap.insert(std::pair(i, naid)); - m_nReadAtoms++; + int natom; + if (!str_natom.toInt(&natom)) { + LString msg = + LString::format("Invalid natom <%s> in CT line", str_natom.c_str()); + MB_THROW(SDFFormatException, msg); } - } - - int natm1, natm2, nbont; - int natm_id1, natm_id2; - std::map::const_iterator iter; - - for (i=0; i in CT line", str_nbond.c_str()); + MB_THROW(SDFFormatException, msg); } - if (!str.substr(3, 3).toInt(&natm2)) { - MB_THROW(SDFFormatException, "Invalid bond line (atom2)"); + + // LOG_DPRINTLN("SDFMolReader> natom: %d", natom); + // LOG_DPRINTLN("SDFMolReader> nbond: %d", nbond); + + int i; + LString str, sx, sy, sz, satom, aname; + double xx, yy, zz; + + std::vector elem_counts(ElemSym::MAX, 0); + std::map atommap; + + for (i = 0; i < natom; ++i) { + str = lin.readLine(); + if (str.trim().isEmpty()) MB_THROW(SDFFormatException, "Atom lines too short"); + + sx = str.substr(0, 10); + sy = str.substr(10, 10); + sz = str.substr(20, 10); + satom = str.substr(31, 3).trim(); + // LOG_DPRINTLN("<%s>", sx.c_str()); + + if (!sx.toDouble(&xx)) + MB_THROW(SDFFormatException, "invalid atom line (x coord):" + str); + if (!sy.toDouble(&yy)) + MB_THROW(SDFFormatException, "invalid atom line (y coord):" + str); + if (!sz.toDouble(&zz)) + MB_THROW(SDFFormatException, "invalid atom line (z coord):" + str); + + ElemID eleid = ElemSym::str2SymID(satom); + elem_counts[eleid] += 1; + aname = LString::format("%s%d", ElemSym::symID2Str(eleid).c_str(), + elem_counts[eleid]); + + // LOG_DPRINTLN("Atom: %f, %f, %f, <%s> %d", xx, yy, zz, aname.c_str(), eleid); + + if (!bskip) { + MolAtomPtr pAtom = MolAtomPtr(MB_NEW MolAtom()); + pAtom->setParentUID(m_pMol->getUID()); + pAtom->setName(aname); + pAtom->setElement(eleid); + + pAtom->setChainName(m_sCurrChName); + pAtom->setResIndex(m_nCurrResid); + pAtom->setResName(cmpd_name); + + pAtom->setPos(Vector4D(xx, yy, zz)); + pAtom->setBfac(0.0); + pAtom->setOcc(1.0); + + int naid = m_pMol->appendAtom(pAtom); + if (naid < 0) + MB_THROW(SDFFormatException, + "invalid SDF format, appendAtom() failed!!"); + + atommap.insert(std::pair(i, naid)); + m_nReadAtoms++; + } } - if (!str.substr(6, 3).toInt(&nbont)) { - MB_THROW(SDFFormatException, "Invalid bond line (bond type)"); + + int natm1, natm2, nbont; + int natm_id1, natm_id2; + std::map::const_iterator iter; + + for (i = 0; i < nbond; ++i) { + str = lin.readLine(); + if (str.trim().isEmpty()) MB_THROW(SDFFormatException, "Bond lines too short"); + + if (!str.substr(0, 3).toInt(&natm1)) { + MB_THROW(SDFFormatException, "Invalid bond line (atom1)"); + } + if (!str.substr(3, 3).toInt(&natm2)) { + MB_THROW(SDFFormatException, "Invalid bond line (atom2)"); + } + if (!str.substr(6, 3).toInt(&nbont)) { + MB_THROW(SDFFormatException, "Invalid bond line (bond type)"); + } + + if (!bskip) { + iter = atommap.find(natm1 - 1); + if (iter == atommap.end()) + MB_THROW(SDFFormatException, + "Invalid bond line (bond atom1 not found)"); + natm_id1 = iter->second; + + iter = atommap.find(natm2 - 1); + if (iter == atommap.end()) + MB_THROW(SDFFormatException, + "Invalid bond line (bond atom2 not found)"); + natm_id2 = iter->second; + + MolBond *pB = m_pMol->makeBond(natm_id1, natm_id2, true); + if (pB == NULL) MB_THROW(SDFFormatException, "makeBond failed"); + + if (nbont == 1) + pB->setType(MolBond::SINGLE); + else if (nbont == 2) + pB->setType(MolBond::DOUBLE); + else if (nbont == 3) + pB->setType(MolBond::TRIPLE); + else if (nbont >= 4) + pB->setType(MolBond::DELOC); + + m_nReadBonds++; + } + + // LOG_DPRINTLN("bond %d<-->%d: %d", natm_id1, natm_id2, nbont); } + // Set noautogen prop to this residue, + // to avoid topology autogen, when saved to and loaded from the qdf stream. if (!bskip) { - iter = atommap.find(natm1-1); - if (iter==atommap.end()) - MB_THROW(SDFFormatException, "Invalid bond line (bond atom1 not found)"); - natm_id1 = iter->second; - - iter = atommap.find(natm2-1); - if (iter==atommap.end()) - MB_THROW(SDFFormatException, "Invalid bond line (bond atom2 not found)"); - natm_id2 = iter->second; - - MolBond *pB = m_pMol->makeBond(natm_id1, natm_id2, true); - if (pB==NULL) - MB_THROW(SDFFormatException, "makeBond failed"); - - if (nbont==1) - pB->setType(MolBond::SINGLE); - else if (nbont==2) - pB->setType(MolBond::DOUBLE); - else if (nbont==3) - pB->setType(MolBond::TRIPLE); - else if (nbont>=4) - pB->setType(MolBond::DELOC); - - m_nReadBonds++; + iter = atommap.find(0); + if (iter != atommap.end()) { + int aid0 = iter->second; + MolAtomPtr pA = m_pMol->getAtom(aid0); + if (!pA.isnull()) { + MolResiduePtr pRes = pA->getParentResidue(); + if (!pRes.isnull()) { + pRes->setPropStr("noautogen", "true"); + } + } + } } - - //LOG_DPRINTLN("bond %d<-->%d: %d", natm_id1, natm_id2, nbont); - } - - // Set noautogen prop to this residue, - // to avoid topology autogen, when saved to and loaded from the qdf stream. - if (!bskip) { - iter = atommap.find(0); - if (iter!=atommap.end()) { - int aid0 = iter->second; - MolAtomPtr pA = m_pMol->getAtom(aid0); - if (!pA.isnull()) { - MolResiduePtr pRes = pA->getParentResidue(); - if (!pRes.isnull()) { - pRes->setPropStr("noautogen", "true"); - } - } - } - } - } diff --git a/src/modules/importers/SDFMolWriter.cpp b/src/modules/importers/SDFMolWriter.cpp index b678dd9e..6c045660 100644 --- a/src/modules/importers/SDFMolWriter.cpp +++ b/src/modules/importers/SDFMolWriter.cpp @@ -117,7 +117,7 @@ void SDFMolWriter::writeResidue(const molstr::MolResiduePtr &presid, } prs.formatln("SDFile Generated by CueMol2"); - prs.formatln(""); + prs.formatln(" RDKit 3D"); prs.formatln(""); int natoms = presid->getAtomSize(); @@ -128,22 +128,31 @@ void SDFMolWriter::writeResidue(const molstr::MolResiduePtr &presid, // 59 60 0 0 0 0 0 0 0 0999 V2000 prs.formatln("% 3d% 3d 0 0 0 0 0 0 0 0999 V2000", natoms, nbonds); + // make atom order + std::set atomset; + for (auto iter = presid->atomBegin(); iter != presid->atomEnd(); ++iter) { + auto aid = iter->second; + atomset.insert(aid); + } + // 15.0880 10.7980 23.5470 N 0 3 0 4 0 4 int index = 1; std::map idmap; - for (auto iter = presid->atomBegin(); iter != presid->atomEnd(); ++iter, ++index) { - auto patom = iter->second; - auto aid = patom->getID(); + // for (auto iter = presid->atomBegin(); iter != presid->atomEnd(); ++iter, ++index) { + for (int aid: atomset) { + // auto aid = iter->second; + auto patom = m_pMol->getAtom(aid); idmap.insert(std::pair(aid, index)); auto pos = patom->getPos(); auto elem = patom->getElementName(); // prs.formatln("%d", index); prs.formatln("%10.4f%10.4f%10.4f%3s 0 0 0 0 0 0 0 0 0 0 0 0", pos.x(), pos.y(), pos.z(), elem.c_str()); + ++index; } for (int bondid: bondvec) { - auto pbond = iter->second; + auto pbond = m_pMol->getBond(bondid); auto aid1 = pbond->getAtom1(); auto aid2 = pbond->getAtom2(); auto ntype = pbond->getType(); @@ -168,9 +177,10 @@ void SDFMolWriter::writeResidue(const molstr::MolResiduePtr &presid, // 2 3 1 0 prs.formatln("%3d%3d%3d 0", id1, id2, itype); } - */ - prs.println("M END"); + + prs.println("M END"); prs.println("$$$$"); + } diff --git a/src/modules/molstr/MolAtom.cpp b/src/modules/molstr/MolAtom.cpp index 4c59e482..87a8aaa9 100755 --- a/src/modules/molstr/MolAtom.cpp +++ b/src/modules/molstr/MolAtom.cpp @@ -33,7 +33,7 @@ MolAtom::MolAtom() m_pXformMat = NULL; - m_charge = 0.0; + // m_charge = 0.0; // m_radius = 0.0; } @@ -58,7 +58,7 @@ MolAtom::MolAtom(const MolAtom &src) m_occ = src.m_occ; m_confid = src.m_confid; - m_charge = src.m_charge; + // m_charge = src.m_charge; // m_radius = src.m_radius; m_paib = NULL; diff --git a/src/modules/molstr/MolAtom.hpp b/src/modules/molstr/MolAtom.hpp index 3381d2a9..adead5a7 100755 --- a/src/modules/molstr/MolAtom.hpp +++ b/src/modules/molstr/MolAtom.hpp @@ -83,8 +83,8 @@ namespace molstr { /// Cached transformation matrix qlib::Matrix4D *m_pXformMat; - /// formal charge - double m_charge; + // /// formal charge + // double m_charge; public: @@ -182,15 +182,6 @@ namespace molstr { m_occ = occup; } - double getCharge() const - { - return m_charge; - } - void setCharge(double value) - { - m_charge = value; - } - /// Get canonical name of atom in the topology definition const LString &getCName() const { return m_canonName; diff --git a/src/modules/molstr/MolAtom.qif b/src/modules/molstr/MolAtom.qif index 525e8dfa..a7986eed 100755 --- a/src/modules/molstr/MolAtom.qif +++ b/src/modules/molstr/MolAtom.qif @@ -47,6 +47,8 @@ runtime_class MolAtom string getPropTypeName(string name); integer getAtomPropInt(string name); void setAtomPropInt(string name, integer value); + real getAtomPropReal(string name); + void setAtomPropReal(string name, real value); //// diff --git a/src/modules/molstr/MolCoord.cpp b/src/modules/molstr/MolCoord.cpp index 3f66781b..7fd72cec 100644 --- a/src/modules/molstr/MolCoord.cpp +++ b/src/modules/molstr/MolCoord.cpp @@ -78,10 +78,18 @@ MolResiduePtr MolCoord::getResidScr(const LString &chain, const LString &sresid) MolAtomPtr MolCoord::getAtom(int atomid) const { - AtomPool::const_iterator iter = m_atomPool.find(atomid); - if (iter==m_atomPool.end()) - return MolAtomPtr(); - return iter->second; + AtomPool::const_iterator iter = m_atomPool.find(atomid); + if (iter==m_atomPool.end()) + return MolAtomPtr(); + return iter->second; +} + +MolBond *MolCoord::getBond(int bondid) const +{ + BondPool::const_iterator iter = m_bondPool.find(bondid); + if (iter==m_bondPool.end()) + return NULL; + return iter->second; } MolAtomPtr MolCoord::getAtom(const LString &chain, ResidIndex resid, diff --git a/src/modules/molstr/MolCoord.hpp b/src/modules/molstr/MolCoord.hpp index 88ef42bf..7e32e126 100644 --- a/src/modules/molstr/MolCoord.hpp +++ b/src/modules/molstr/MolCoord.hpp @@ -191,6 +191,8 @@ namespace molstr { /// Convert from (persistent) string representation to aid int fromStrAID(const LString &strid) const; + MolBond *getBond(int bondid) const; + ///////////////////////////////////////////////////// // chain operations diff --git a/src/modules/molstr/MolCoord.qif b/src/modules/molstr/MolCoord.qif index 6c27ebc9..6c0de374 100644 --- a/src/modules/molstr/MolCoord.qif +++ b/src/modules/molstr/MolCoord.qif @@ -50,6 +50,8 @@ runtime_class MolCoord extends Object integer getAtomSize(); integer getAtomSelSize(object psel) => getAtomSize; + integer getBondSize(); + /// Encode model ID into the chain name string encodeModelInChain(string chain_name, integer imodel); diff --git a/src/modules/molstr/MolResidue.hpp b/src/modules/molstr/MolResidue.hpp index 540fc97e..33e5fea8 100644 --- a/src/modules/molstr/MolResidue.hpp +++ b/src/modules/molstr/MolResidue.hpp @@ -67,7 +67,7 @@ namespace molstr { public: typedef atomdata_t::const_iterator AtomCursor; - typedef std::set::const_iterator BondCursor; + // typedef std::set::const_iterator BondCursor; public: diff --git a/tests/importers_tests/test_sdf_reader.py b/tests/importers_tests/test_sdf_reader.py index 2085ad07..cc2e95f8 100644 --- a/tests/importers_tests/test_sdf_reader.py +++ b/tests/importers_tests/test_sdf_reader.py @@ -14,7 +14,11 @@ def test_sdf_reader(test_data_path): reader.detach() natoms = obj.getAtomSize() print(f"{natoms=}") - assert natoms == 59 + assert natoms == 37 + + nbonds = obj.getBondSize() + print(f"{nbonds=}") + assert nbonds == 40 @pytest.fixture def mol_from_sdf(test_data_path): diff --git a/tests/test_data/test1.sdf b/tests/test_data/test1.sdf index 443f224f..0f10ecca 100644 --- a/tests/test_data/test1.sdf +++ b/tests/test_data/test1.sdf @@ -1,140 +1,83 @@ -10gs_ligand -Created by X-TOOL on Fri Nov 18 12:16:16 2016 - 59 60 0 0 0 0 0 0 0 0999 V2000 - 15.0880 10.7980 23.5470 N 0 3 0 4 0 4 - 15.0100 9.9870 24.7920 C 0 0 0 2 0 4 - 16.1150 8.9240 24.8300 C 0 5 0 1 0 3 - 16.5200 8.5150 25.9400 O 0 0 0 1 0 1 - 13.6350 9.3270 24.9080 C 0 0 0 3 0 4 - 13.3940 8.7080 26.2710 C 0 0 0 3 0 4 - 12.0450 8.0460 26.4020 C 0 0 0 1 0 3 - 11.2930 7.9360 25.4350 O 0 0 0 1 0 1 - 16.5780 8.5240 23.7440 O 0 0 0 1 0 1 - 11.7260 7.6420 27.6280 N 0 0 0 2 0 3 - 10.4720 6.9670 27.9340 C 0 0 0 2 0 4 - 10.7260 5.4840 28.2060 C 0 0 0 3 0 4 - 11.2910 4.5240 26.8100 S 0 0 0 1 0 2 - 9.7290 3.8040 26.2620 C 0 0 0 3 0 4 - 8.9300 3.1710 27.3700 C 0 0 0 1 0 3 - 7.6400 3.6140 27.6500 C 0 0 0 2 0 3 - 9.4640 2.1350 28.1330 C 0 0 0 2 0 3 - 6.8930 3.0370 28.6730 C 0 0 0 2 0 3 - 8.7230 1.5500 29.1610 C 0 0 0 2 0 3 - 7.4370 2.0010 29.4300 C 0 0 0 2 0 3 - 9.8340 7.5500 29.1800 C 0 0 0 1 0 3 - 10.5220 8.0230 30.0840 O 0 0 0 1 0 1 - 8.5120 7.4680 29.2290 N 0 0 0 2 0 3 - 7.7400 7.9330 30.3660 C 0 0 0 2 0 4 - 6.5550 7.0620 30.6330 C 0 0 0 1 0 3 - 5.3300 7.3150 30.0270 C 0 0 0 2 0 3 - 6.6830 5.9410 31.4410 C 0 0 0 2 0 3 - 4.2500 6.4590 30.2200 C 0 0 0 2 0 3 - 5.6110 5.0810 31.6400 C 0 0 0 2 0 3 - 4.3920 5.3390 31.0270 C 0 0 0 2 0 3 - 7.4520 9.4330 30.3540 C 0 5 0 1 0 3 - 7.1160 9.9570 31.4330 O 0 0 0 1 0 1 - 7.5690 10.0680 29.2840 O 0 0 0 1 0 1 - 14.3522 11.4870 23.5482 H 0 0 0 1 0 1 - 14.9824 10.1951 22.7461 H 0 0 0 1 0 1 - 15.9821 11.2614 23.5033 H 0 0 0 1 0 1 - 15.1478 10.6593 25.6517 H 0 0 0 1 0 1 - 13.5582 8.5382 24.1452 H 0 0 0 1 0 1 - 12.8628 10.0891 24.7265 H 0 0 0 1 0 1 - 13.4683 9.4998 27.0309 H 0 0 0 1 0 1 - 14.1719 7.9516 26.4522 H 0 0 0 1 0 1 - 12.3741 7.8085 28.3711 H 0 0 0 1 0 1 - 9.7864 7.0776 27.0810 H 0 0 0 1 0 1 - 9.7847 5.0400 28.5622 H 0 0 0 1 0 1 - 11.4878 5.4113 28.9961 H 0 0 0 1 0 1 - 9.9478 3.0333 25.5083 H 0 0 0 1 0 1 - 9.1210 4.5998 25.8070 H 0 0 0 1 0 1 - 7.2118 4.4189 27.0636 H 0 0 0 1 0 1 - 10.4667 1.7790 27.9259 H 0 0 0 1 0 1 - 5.8903 3.3927 28.8804 H 0 0 0 1 0 1 - 9.1502 0.7453 29.7483 H 0 0 0 1 0 1 - 6.8580 1.5482 30.2268 H 0 0 0 1 0 1 - 8.0270 7.0694 28.4506 H 0 0 0 1 0 1 - 8.3977 7.7875 31.2356 H 0 0 0 1 0 1 - 5.2146 8.1893 29.3967 H 0 0 0 1 0 1 - 7.6317 5.7341 31.9229 H 0 0 0 1 0 1 - 3.2999 6.6666 29.7411 H 0 0 0 1 0 1 - 5.7257 4.2088 32.2735 H 0 0 0 1 0 1 - 3.5543 4.6679 31.1783 H 0 0 0 1 0 1 - 2 1 1 0 0 2 - 2 5 1 0 0 2 - 2 3 1 0 0 2 - 3 4 2 0 0 2 - 3 9 2 0 0 2 - 5 6 1 0 0 2 - 6 7 1 0 0 2 - 7 8 2 0 0 2 - 7 10 1 0 0 2 - 10 11 1 0 0 2 - 11 12 1 0 0 2 - 11 21 1 0 0 2 - 12 13 1 0 0 2 - 13 14 1 0 0 2 - 14 15 1 0 0 2 - 15 16 4 0 0 1 - 15 17 4 0 0 1 - 16 18 4 0 0 1 - 17 19 4 0 0 1 - 18 20 4 0 0 1 - 19 20 4 0 0 1 - 21 22 2 0 0 2 - 21 23 1 0 0 2 - 23 24 1 0 0 2 - 24 25 1 0 0 2 - 24 31 1 0 0 2 - 25 26 4 0 0 1 - 25 27 4 0 0 1 - 26 28 4 0 0 1 - 27 29 4 0 0 1 - 28 30 4 0 0 1 - 29 30 4 0 0 1 - 31 32 2 0 0 2 - 31 33 2 0 0 2 - 1 34 1 0 0 2 - 1 35 1 0 0 2 - 1 36 1 0 0 2 - 2 37 1 0 0 2 - 5 38 1 0 0 2 - 5 39 1 0 0 2 - 6 40 1 0 0 2 - 6 41 1 0 0 2 - 10 42 1 0 0 2 - 11 43 1 0 0 2 - 12 44 1 0 0 2 - 12 45 1 0 0 2 - 14 46 1 0 0 2 - 14 47 1 0 0 2 - 16 48 1 0 0 2 - 17 49 1 0 0 2 - 18 50 1 0 0 2 - 19 51 1 0 0 2 - 20 52 1 0 0 2 - 23 53 1 0 0 2 - 24 54 1 0 0 2 - 26 55 1 0 0 2 - 27 56 1 0 0 2 - 28 57 1 0 0 2 - 29 58 1 0 0 2 - 30 59 1 0 0 2 -M END -> -C23H26N3O6S - -> -472.3 - -> -10 - -> -11 - -> --0.40 + RDKit 3D + 37 40 0 0 0 0 0 0 0 0999 V2000 + -0.8243 3.6640 -0.7777 N 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8548 2.2403 -0.7153 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9515 1.6029 -0.1945 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0329 0.2387 -0.1075 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2476 -0.3950 0.4670 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.2738 0.3914 1.0225 C 0 0 0 0 0 0 0 0 0 0 0 0 + -5.4084 -0.2278 1.5511 C 0 0 0 0 0 0 0 0 0 0 0 0 + -5.5186 -1.6177 1.5241 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4862 -2.3713 0.9652 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3895 -1.7502 0.4555 N 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9320 -0.4872 -0.5807 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5933 -1.8300 -0.6509 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6467 -1.9349 -1.1916 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0844 -0.7124 -1.4710 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4140 -0.4161 -1.9960 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.3742 -0.1843 -0.8601 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9239 -1.2840 -0.1798 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.7875 -1.0858 0.9015 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.1022 0.2099 1.3177 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5516 1.3100 0.6551 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6867 1.1189 -0.4278 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1593 2.2029 -1.0510 F 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1432 0.1673 -1.0961 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2093 1.5171 -1.1720 N 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6331 4.2255 -0.4302 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0077 4.1580 -1.1701 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.2018 1.4713 1.0515 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.2015 0.3701 1.9813 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.3945 -2.1059 1.9314 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.5559 -3.4504 0.9354 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1784 -2.6770 -0.3244 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7657 -1.2690 -2.6165 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3668 0.4681 -2.6678 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6812 -2.2942 -0.4859 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2099 -1.9373 1.4191 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.7699 0.3617 2.1558 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.7939 2.3125 0.9836 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 2 0 + 3 4 1 0 + 4 5 1 0 + 5 6 2 0 + 6 7 1 0 + 7 8 2 0 + 8 9 1 0 + 9 10 2 0 + 4 11 2 0 + 11 12 1 0 + 12 13 2 0 + 13 14 1 0 + 14 15 1 0 + 15 16 1 0 + 16 17 2 0 + 17 18 1 0 + 18 19 2 0 + 19 20 1 0 + 20 21 2 0 + 21 22 1 0 + 14 23 1 0 + 23 24 2 0 + 24 2 1 0 + 10 5 1 0 + 23 11 1 0 + 21 16 1 0 + 1 25 1 0 + 1 26 1 0 + 6 27 1 0 + 7 28 1 0 + 8 29 1 0 + 9 30 1 0 + 12 31 1 0 + 15 32 1 0 + 15 33 1 0 + 17 34 1 0 + 18 35 1 0 + 19 36 1 0 + 20 37 1 0 +M END $$$$ diff --git a/tests/test_data/test_chg1.sdf b/tests/test_data/test_chg1.sdf new file mode 100644 index 00000000..c1d40910 --- /dev/null +++ b/tests/test_data/test_chg1.sdf @@ -0,0 +1,128 @@ + + RDKit 3D + + 59 62 0 0 0 0 0 0 0 0999 V2000 + -4.7692 -5.4739 -0.3576 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.7232 -3.9953 -0.1657 C 0 0 0 0 0 0 0 0 0 0 0 0 + -5.8011 -3.1849 0.1252 C 0 0 0 0 0 0 0 0 0 0 0 0 + -5.2736 -1.9089 0.1319 C 0 0 0 0 0 0 0 0 0 0 0 0 + -6.0616 -0.6624 0.4002 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.9494 -1.9782 -0.1557 N 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0027 -0.8945 -0.2631 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6633 -1.1464 -0.5969 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7698 -0.0683 -0.6830 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6068 -0.2563 -1.0447 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3933 -1.4002 -0.6736 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9633 -2.2288 0.1739 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7753 -1.5759 -1.2337 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8608 -1.1229 -0.2359 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2556 -1.5023 -0.7639 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3528 -0.9971 0.1830 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.2574 0.4662 0.3735 N 0 0 0 0 0 4 0 0 0 0 0 0 + 7.3401 0.9316 1.2563 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.9444 0.8388 0.9424 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7972 0.3953 0.0238 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2302 1.1935 -0.4767 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5286 1.4289 -0.1560 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.9889 2.8036 0.0713 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.2768 3.1868 0.4007 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.2407 4.5604 0.5159 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.9360 4.9239 0.2463 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4012 6.3161 0.2640 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2120 3.8595 -0.0120 O 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3962 0.3892 -0.0493 N 0 0 0 0 0 0 0 0 0 0 0 0 + -3.6178 -3.2479 -0.3444 N 0 0 0 0 0 0 0 0 0 0 0 0 + -5.0740 -5.7022 -1.4000 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.7675 -5.9125 -0.1657 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.4997 -5.9266 0.3455 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.8273 -3.4824 0.2982 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.6520 -0.1425 1.2910 H 0 0 0 0 0 0 0 0 0 0 0 0 + -7.1256 -0.9109 0.5990 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.0181 0.0081 -0.4831 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3341 -2.1536 -0.8122 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0933 0.5396 -1.5157 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.9173 -2.6528 -1.4723 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8712 -1.0126 -2.1877 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6988 -1.6574 0.7282 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.3364 -2.6079 -0.8513 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4140 -1.0638 -1.7743 H 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3397 -1.2602 -0.2599 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.2673 -1.5270 1.1596 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3762 0.9410 -0.5538 H 0 0 0 0 0 0 0 0 0 0 0 0 + 8.3320 0.6855 0.8171 H 0 0 0 0 0 0 0 0 0 0 0 0 + 7.2952 2.0365 1.3763 H 0 0 0 0 0 0 0 0 0 0 0 0 + 7.2687 0.4604 2.2621 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8073 0.3783 1.9479 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8818 1.9436 1.0646 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8613 0.9508 -0.9379 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8339 0.6577 0.5116 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.1375 2.5464 0.5425 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.0647 5.2149 0.7675 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0576 6.9793 -0.3374 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3634 6.6860 1.3097 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3779 6.3347 -0.1658 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 4 5 1 0 + 4 6 1 0 + 6 7 1 0 + 7 8 2 0 + 8 9 1 0 + 9 10 1 0 + 10 11 1 0 + 11 12 2 0 + 11 13 1 0 + 13 14 1 0 + 14 15 1 0 + 15 16 1 0 + 16 17 1 0 + 17 18 1 0 + 17 19 1 0 + 19 20 1 0 + 9 21 2 0 + 21 22 1 0 + 22 23 1 0 + 23 24 2 0 + 24 25 1 0 + 25 26 2 0 + 26 27 1 0 + 26 28 1 0 + 22 29 2 0 + 6 30 1 0 + 30 2 2 0 + 29 7 1 0 + 20 14 1 0 + 28 23 1 0 + 1 31 1 0 + 1 32 1 0 + 1 33 1 0 + 3 34 1 0 + 5 35 1 0 + 5 36 1 0 + 5 37 1 0 + 8 38 1 0 + 10 39 1 0 + 13 40 1 0 + 13 41 1 0 + 14 42 1 0 + 15 43 1 0 + 15 44 1 0 + 16 45 1 0 + 16 46 1 0 + 17 47 1 0 + 18 48 1 0 + 18 49 1 0 + 18 50 1 0 + 19 51 1 0 + 19 52 1 0 + 20 53 1 0 + 20 54 1 0 + 24 55 1 0 + 25 56 1 0 + 27 57 1 0 + 27 58 1 0 + 27 59 1 0 +M CHG 1 17 1 +M END +$$$$