From cc4da2b034be191ca17972c2aa4aa9d398cc2ca2 Mon Sep 17 00:00:00 2001 From: John May Date: Mon, 6 Mar 2017 15:25:44 +0000 Subject: [PATCH] Work around limitation of absolute SMILES with unknown elements. We encode the unknown element as Rf (Rutherfordium - the highest supported by InChI v1.3 - JNI InChI current). We can still encode structures with Rf providing there are no unknown atoms. --- .../cdk/smiles/AbsoluteSMILESTest.java | 16 ++++++++++++ .../cdk/smiles/SmilesGenerator.java | 26 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/storage/inchi/src/test/java/org/openscience/cdk/smiles/AbsoluteSMILESTest.java b/storage/inchi/src/test/java/org/openscience/cdk/smiles/AbsoluteSMILESTest.java index 4d7b5cc28a..f6e5a3ce24 100644 --- a/storage/inchi/src/test/java/org/openscience/cdk/smiles/AbsoluteSMILESTest.java +++ b/storage/inchi/src/test/java/org/openscience/cdk/smiles/AbsoluteSMILESTest.java @@ -27,6 +27,7 @@ import com.google.common.base.Joiner; import org.junit.Assert; import org.junit.Test; +import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.silent.SilentChemObjectBuilder; @@ -112,6 +113,21 @@ public void dbStereoCanonGeneration() throws Exception { cansmi.create(smipar.parseSmiles(cansmi.create(mol)))); } + @Test + public void smilesWithUnknownElem() throws Exception { + test("*CC", "CC*"); + } + + @Test + public void rfElement() throws Exception { + test("[Rf]"); + } + + @Test(expected = CDKException.class) + public void problematic() throws Exception { + test("*[Rf]"); + } + static void test(String... inputs) throws Exception { SmilesParser sp = new SmilesParser(SilentChemObjectBuilder.getInstance()); diff --git a/storage/smiles/src/main/java/org/openscience/cdk/smiles/SmilesGenerator.java b/storage/smiles/src/main/java/org/openscience/cdk/smiles/SmilesGenerator.java index e701ddc08f..50ab2b673d 100644 --- a/storage/smiles/src/main/java/org/openscience/cdk/smiles/SmilesGenerator.java +++ b/storage/smiles/src/main/java/org/openscience/cdk/smiles/SmilesGenerator.java @@ -23,6 +23,7 @@ package org.openscience.cdk.smiles; import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.config.Elements; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.graph.ConnectedComponents; import org.openscience.cdk.graph.GraphUtil; @@ -47,6 +48,7 @@ import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.IdentityHashMap; @@ -690,6 +692,13 @@ private static long[] inchiNumbers(IAtomContainer container) throws CDKException // class each time String cname = "org.openscience.cdk.graph.invariant.InChINumbersTools"; String mname = "getUSmilesNumbers"; + + List rgrps = getRgrps(container, Elements.Rutherfordium); + for (IAtom rgrp : rgrps) { + rgrp.setAtomicNumber(Elements.Rutherfordium.number()); + rgrp.setSymbol(Elements.Rutherfordium.symbol()); + } + try { Class c = Class.forName(cname); Method method = c.getDeclaredMethod("getUSmilesNumbers", IAtomContainer.class); @@ -703,7 +712,24 @@ private static long[] inchiNumbers(IAtomContainer container) throws CDKException throw new CDKException("An InChI could not be generated and used to canonise SMILES: " + e.getMessage(), e); } catch (IllegalAccessException e) { throw new CDKException("Could not access method to obtain InChI numbers."); + } finally { + for (IAtom rgrp : rgrps) { + rgrp.setAtomicNumber(Elements.Unknown.number()); + rgrp.setSymbol("*"); + } + } + } + + private static List getRgrps(IAtomContainer container, Elements reversed) { + List res = new ArrayList<>(); + for (IAtom atom : container.atoms()) { + if (atom.getAtomicNumber() == 0) { + res.add(atom); + } else if (atom.getAtomicNumber() == reversed.number()) { + return Collections.emptyList(); + } } + return res; } // utility safety check to guard against invalid state