From b3681cf4bd3d33eef5d38bd8917ce8818505047d Mon Sep 17 00:00:00 2001 From: Geoff Hutchison Date: Mon, 28 Oct 2024 15:02:37 -0400 Subject: [PATCH] Add an explicit sdf format which writes properties Signed-off-by: Geoff Hutchison --- avogadro/io/CMakeLists.txt | 14 ++-- avogadro/io/fileformatmanager.cpp | 1 + avogadro/io/mdlformat.cpp | 17 +++-- avogadro/io/mdlformat.h | 10 ++- avogadro/io/sdfformat.cpp | 41 ++++++++++++ avogadro/io/sdfformat.h | 67 +++++++++++++++++++ .../qtplugins/forcefield/scriptenergy.cpp | 7 +- avogadro/qtplugins/forcefield/scriptenergy.h | 3 +- .../scriptcharges/scriptchargemodel.cpp | 7 +- .../scriptcharges/scriptchargemodel.h | 3 +- .../scriptfileformats/fileformatscript.cpp | 11 ++- .../scriptfileformats/fileformatscript.h | 4 +- 12 files changed, 164 insertions(+), 21 deletions(-) create mode 100644 avogadro/io/sdfformat.cpp create mode 100644 avogadro/io/sdfformat.h diff --git a/avogadro/io/CMakeLists.txt b/avogadro/io/CMakeLists.txt index 3afa3ec267..a0014a48fe 100644 --- a/avogadro/io/CMakeLists.txt +++ b/avogadro/io/CMakeLists.txt @@ -9,12 +9,13 @@ avogadro_headers(IO fileformatmanager.h gromacsformat.h mdlformat.h - vaspformat.h + lammpsformat.h pdbformat.h - xyzformat.h + sdfformat.h trrformat.h turbomoleformat.h - lammpsformat.h + vaspformat.h + xyzformat.h ) target_sources(IO PRIVATE @@ -25,13 +26,14 @@ target_sources(IO PRIVATE fileformat.cpp fileformatmanager.cpp gromacsformat.cpp + lammpsformat.cpp mdlformat.cpp - vaspformat.cpp pdbformat.cpp - xyzformat.cpp + sdfformat.cpp trrformat.cpp turbomoleformat.cpp - lammpsformat.cpp + vaspformat.cpp + xyzformat.cpp ) if(USE_HDF5) diff --git a/avogadro/io/fileformatmanager.cpp b/avogadro/io/fileformatmanager.cpp index ec19127dca..96c8307dbb 100644 --- a/avogadro/io/fileformatmanager.cpp +++ b/avogadro/io/fileformatmanager.cpp @@ -15,6 +15,7 @@ #include "lammpsformat.h" #include "mdlformat.h" #include "pdbformat.h" +#include "sdfformat.h" #include "trrformat.h" #include "turbomoleformat.h" #include "vaspformat.h" diff --git a/avogadro/io/mdlformat.cpp b/avogadro/io/mdlformat.cpp index dc5f9cf309..2bea8f6d8a 100644 --- a/avogadro/io/mdlformat.cpp +++ b/avogadro/io/mdlformat.cpp @@ -182,7 +182,7 @@ bool MdlFormat::read(std::istream& in, Core::Molecule& mol) } // Apply charges. - for (auto & i : chargeList) { + for (auto& i : chargeList) { size_t index = i.first; signed int charge = i.second; mol.setFormalCharge(index, charge); @@ -260,13 +260,23 @@ bool MdlFormat::write(std::ostream& out, const Core::Molecule& mol) << " 0 0 0 0\n"; } // Properties block. - for (auto & i : chargeList) { + for (auto& i : chargeList) { Index atomIndex = i.first; signed int atomCharge = i.second; out << "M CHG 1 " << setw(3) << std::right << atomIndex + 1 << " " << setw(3) << atomCharge << "\n"; } + // TODO: isotopes, etc. out << "M END\n"; + // Data block + if (m_writeProperties) { + const auto dataMap = mol.dataMap(); + for (const auto& key : dataMap.names()) { + out << "> <" << key << ">\n"; + out << dataMap.value(key).toString() << "\n"; + out << "\n"; // empty line between data blocks + } + } if (isMode(FileFormat::MultiMolecule)) out << "$$$$\n"; @@ -278,7 +288,6 @@ std::vector MdlFormat::fileExtensions() const { std::vector ext; ext.emplace_back("mol"); - ext.emplace_back("sdf"); return ext; } @@ -289,4 +298,4 @@ std::vector MdlFormat::mimeTypes() const return mime; } -} // namespace Avogadro +} // namespace Avogadro::Io diff --git a/avogadro/io/mdlformat.h b/avogadro/io/mdlformat.h index 4aebbcf963..c887221f61 100644 --- a/avogadro/io/mdlformat.h +++ b/avogadro/io/mdlformat.h @@ -40,7 +40,8 @@ class AVOGADROIO_EXPORT MdlFormat : public FileFormat std::string specificationUrl() const override { - return "http://help.accelrysonline.com/ulm/onelab/1.0/content/ulm_pdfs/direct/" + return "http://help.accelrysonline.com/ulm/onelab/1.0/content/ulm_pdfs/" + "direct/" "reference/ctfileformats2016.pdf"; /* for previous (2011) version, see: https://web.archive.org/web/20180329184712/http://download.accelrys.com/freeware/ctfile-formats/ctfile-formats.zip @@ -52,9 +53,12 @@ class AVOGADROIO_EXPORT MdlFormat : public FileFormat bool read(std::istream& in, Core::Molecule& molecule) override; bool write(std::ostream& out, const Core::Molecule& molecule) override; + +protected: + bool m_writeProperties = false; }; -} // end Io namespace -} // end Avogadro namespace +} // namespace Io +} // namespace Avogadro #endif // AVOGADRO_IO_MDLFORMAT_H diff --git a/avogadro/io/sdfformat.cpp b/avogadro/io/sdfformat.cpp new file mode 100644 index 0000000000..4238a13d9c --- /dev/null +++ b/avogadro/io/sdfformat.cpp @@ -0,0 +1,41 @@ +/****************************************************************************** + This source file is part of the Avogadro project. + This source code is released under the 3-Clause BSD License, (see "LICENSE"). +******************************************************************************/ + +#include "sdfformat.h" + +namespace Avogadro::Io { + +SdfFormat::SdfFormat() : MdlFormat() +{ + m_writeProperties = true; +} + +SdfFormat::~SdfFormat() {} + +bool SdfFormat::read(std::istream& in, Core::Molecule& mol) +{ + return MdlFormat::read(in, mol); +} + +bool SdfFormat::write(std::ostream& out, const Core::Molecule& mol) +{ + return MdlFormat::write(out, mol); +} + +std::vector SdfFormat::fileExtensions() const +{ + std::vector ext; + ext.emplace_back("sdf"); + return ext; +} + +std::vector SdfFormat::mimeTypes() const +{ + std::vector mime; + mime.emplace_back("chemical/x-mdl-molfile"); + return mime; +} + +} // namespace Avogadro::Io diff --git a/avogadro/io/sdfformat.h b/avogadro/io/sdfformat.h new file mode 100644 index 0000000000..8d9119e0ae --- /dev/null +++ b/avogadro/io/sdfformat.h @@ -0,0 +1,67 @@ +/****************************************************************************** + This source file is part of the Avogadro project. + This source code is released under the 3-Clause BSD License, (see "LICENSE"). +******************************************************************************/ + +#ifndef AVOGADRO_IO_SDFFORMAT_H +#define AVOGADRO_IO_SDFFORMAT_H + +#include "fileformat.h" +#include "mdlformat.h" + +namespace Avogadro { +namespace Io { + +/** + * @class SdfFormat sdfformat.h + * @brief Implementation of the generic SDF format. + * @author Marcus D. Hanwell + * + * Differs from the MDL / Mol format in that it includes properties + * + * Currently just supports V2000 of the format. + */ + +class AVOGADROIO_EXPORT SdfFormat : public MdlFormat +{ +public: + SdfFormat(); + ~SdfFormat() override; + + Operations supportedOperations() const override + { + return ReadWrite | MultiMolecule | File | Stream | String; + } + + FileFormat* newInstance() const override { return new SdfFormat; } + std::string identifier() const override { return "Avogadro: SDF"; } + std::string name() const override { return "SDF"; } + std::string description() const override + { + return "Generic format that contains atoms, bonds, positions."; + } + + std::string specificationUrl() const override + { + return "http://help.accelrysonline.com/ulm/onelab/1.0/content/ulm_pdfs/" + "direct/" + "reference/ctfileformats2016.pdf"; + /* for previous (2011) version, see: + https://web.archive.org/web/20180329184712/http://download.accelrys.com/freeware/ctfile-formats/ctfile-formats.zip + */ + } + + std::vector fileExtensions() const override; + std::vector mimeTypes() const override; + + bool read(std::istream& in, Core::Molecule& molecule) override; + bool write(std::ostream& out, const Core::Molecule& molecule) override; + +protected: + bool m_writeProperties = true; +}; + +} // namespace Io +} // namespace Avogadro + +#endif // AVOGADRO_IO_MDLFORMAT_H diff --git a/avogadro/qtplugins/forcefield/scriptenergy.cpp b/avogadro/qtplugins/forcefield/scriptenergy.cpp index 6d9925caf3..293d1cac02 100644 --- a/avogadro/qtplugins/forcefield/scriptenergy.cpp +++ b/avogadro/qtplugins/forcefield/scriptenergy.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -191,10 +192,12 @@ ScriptEnergy::Format ScriptEnergy::stringToFormat(const std::string& str) return Cjson; else if (str == "cml") return Cml; - else if (str == "mdl" || str == "mol" || str == "sdf" || str == "sd") + else if (str == "mdl" || str == "mol") return Mdl; else if (str == "pdb") return Pdb; + else if (str == "sdf") + return Sdf; else if (str == "xyz") return Xyz; return NotUsed; @@ -211,6 +214,8 @@ Io::FileFormat* ScriptEnergy::createFileFormat(ScriptEnergy::Format fmt) return new Io::MdlFormat; case Pdb: return new Io::PdbFormat; + case Sdf: + return new Io::SdfFormat; case Xyz: return new Io::XyzFormat; default: diff --git a/avogadro/qtplugins/forcefield/scriptenergy.h b/avogadro/qtplugins/forcefield/scriptenergy.h index 32783d2ff5..9aab7bf0a1 100644 --- a/avogadro/qtplugins/forcefield/scriptenergy.h +++ b/avogadro/qtplugins/forcefield/scriptenergy.h @@ -39,8 +39,9 @@ class ScriptEnergy : public Avogadro::Calc::EnergyCalculator NotUsed, Cjson, Cml, - Mdl, // sdf + Mdl, Pdb, + Sdf, Xyz }; diff --git a/avogadro/qtplugins/scriptcharges/scriptchargemodel.cpp b/avogadro/qtplugins/scriptcharges/scriptchargemodel.cpp index e28d239d87..92ab999e2f 100644 --- a/avogadro/qtplugins/scriptcharges/scriptchargemodel.cpp +++ b/avogadro/qtplugins/scriptcharges/scriptchargemodel.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -199,8 +200,10 @@ ScriptChargeModel::Format ScriptChargeModel::stringToFormat( return Cjson; else if (str == "cml") return Cml; - else if (str == "mdl" || str == "mol" || str == "sdf" || str == "sd") + else if (str == "mdl" || str == "mol") return Mdl; + else if (str == "sdf") + return Sdf; else if (str == "pdb") return Pdb; else if (str == "xyz") @@ -220,6 +223,8 @@ Io::FileFormat* ScriptChargeModel::createFileFormat( return new Io::MdlFormat; case Pdb: return new Io::PdbFormat; + case Sdf: + return new Io::SdfFormat; case Xyz: return new Io::XyzFormat; default: diff --git a/avogadro/qtplugins/scriptcharges/scriptchargemodel.h b/avogadro/qtplugins/scriptcharges/scriptchargemodel.h index 9f5abb6739..290c2f10bb 100644 --- a/avogadro/qtplugins/scriptcharges/scriptchargemodel.h +++ b/avogadro/qtplugins/scriptcharges/scriptchargemodel.h @@ -35,8 +35,9 @@ class ScriptChargeModel : public Avogadro::Calc::ChargeModel NotUsed, Cjson, Cml, - Mdl, // sdf + Mdl, Pdb, + Sdf, Xyz }; diff --git a/avogadro/qtplugins/scriptfileformats/fileformatscript.cpp b/avogadro/qtplugins/scriptfileformats/fileformatscript.cpp index 433dd0cead..816862e306 100644 --- a/avogadro/qtplugins/scriptfileformats/fileformatscript.cpp +++ b/avogadro/qtplugins/scriptfileformats/fileformatscript.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -130,10 +131,12 @@ FileFormatScript::Format FileFormatScript::stringToFormat( return Cjson; else if (str == "cml") return Cml; - else if (str == "mdl" || str == "mol" || str == "sdf" || str == "sd") + else if (str == "mdl" || str == "mol") return Mdl; else if (str == "pdb") return Pdb; + else if (str == "sdf") + return Sdf; else if (str == "xyz") return Xyz; return NotUsed; @@ -150,6 +153,8 @@ Io::FileFormat* FileFormatScript::createFileFormat(FileFormatScript::Format fmt) return new Io::MdlFormat; case Pdb: return new Io::PdbFormat; + case Sdf: + return new Io::SdfFormat; case Xyz: return new Io::XyzFormat; default: @@ -217,7 +222,7 @@ void FileFormatScript::readMetaData() // validate operations: Operations operationsTmp = Io::FileFormat::None; - for (auto & it : opStringsTmp) { + for (auto& it : opStringsTmp) { if (it == "read") operationsTmp |= Io::FileFormat::Read; else if (it == "write") @@ -357,4 +362,4 @@ bool FileFormatScript::parseStringArray(const QJsonObject& ob, return !array.empty(); } -} // namespace Avogadro +} // namespace Avogadro::QtPlugins diff --git a/avogadro/qtplugins/scriptfileformats/fileformatscript.h b/avogadro/qtplugins/scriptfileformats/fileformatscript.h index 620de7c412..cd42c54328 100644 --- a/avogadro/qtplugins/scriptfileformats/fileformatscript.h +++ b/avogadro/qtplugins/scriptfileformats/fileformatscript.h @@ -61,7 +61,8 @@ namespace QtPlugins { * `"cjson"`, or `"xyz"`. See the `--write` documentation for more detail. * - `outputFormat` indicates the format that the script can convert to from the * implemented format by the `--read` command. Allowed values are `"cml"`, - * `"cjson"`, `"sdf"`, `"pdb"` or `"xyz"`. See the `--read` documentation for more detail. + * `"cjson"`, `"sdf"`, `"pdb"` or `"xyz"`. See the `--read` documentation for +more detail. * - `operations` specifies the scripts capabilities. The array should contain * `"read"` if the script implements the `--read` option, and/or `"write"` if * `--write` is available. @@ -120,6 +121,7 @@ class FileFormatScript : public Avogadro::Io::FileFormat Cml, Mdl, Pdb, + Sdf, Xyz };