Skip to content

Commit

Permalink
Allow -RecMet and -FixedH options.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnmay committed Feb 5, 2019
1 parent def66e1 commit 1cd8309
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 59 deletions.
35 changes: 18 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,24 @@ the end of an InChI. Currently the following geometries are supported:
* Trigonal Bipyramidal: ``/ma<atom>tb<order>`` where order is 1-20
* Octahedral: ``/ma<atom>o<order>`` where order is 1-30

Daylight used the term 'chiral order', I prefer simply 'order' as these
Daylight used the term 'chiral order', I prefer 'order' as these
geometries are not necessarily chiral.

The order specifies a permutation index and uses the same coding
scheme as SMILES (see. [relevant blog post](http://timvdm.blogspot.com/2010/09/smiles-stereochemistry-enigma.html)).
It works because we need to be able to specify any order of the neighbors around the central (or
'focus') atom. To specify any ordering for octahedral we have 6 neighbors so
there are 720 (6 factorial) possible ways to order them. However there are 24
symmetries and so we only need 720/24 = 30 possible orders. For trigonal
bipyramidal, 120 (i.e. 5 factorial) ways to order but 6 symmetries, 120/5 = 20
possible orders.

For each of these geometry we use a table to look up the
ordering we have ended up with from the canonical labelling. When there are
symmetries within the neighbors we choose the lowest possible ordering.
Currently such symmetries are broken by enumeration but in practice a
backtracking canonical labelling algorithm (such as that used by the InChI) can
take care of this step.
It works because we need to be able to specify any order of the neighbors around
the central (or 'focus') atom. To specify any ordering for octahedral we have 6
neighbors so there are 720 (6 factorial) possible ways to order them. However
there are 24 symmetries and so we only need 720/24 = 30 possible orders. For
trigonal bipyramidal, 120 (i.e. 5 factorial) ways to order but 6 symmetries,
120/5 = 20 possible orders.

For each of these geometries we use a table to look up the
ordering we have ended up with from the InChI atom numbers (parsed from the
AuxInfo). When there are symmetries within the neighbors we choose the lowest
possible ordering. Currently such symmetries are broken by re-enumeration but in
practice a backtracking canonical labelling algorithm (such as that used by the
InChI) can take care of this step.

### Examples

Expand Down Expand Up @@ -157,19 +157,20 @@ two equatorial, or one in the axis and one equatorial.

### Usage

A command line application is provided (download: [inchi-ma.jar](https://github.com/johnmay/inchi-ma/releases/latest)) that can provide InChIs for a SMILES or 3D SDfile:
A command line application is provided (available: [inchi-ma.jar](https://github.com/johnmay/inchi-ma/releases/latest)) that can provide InChIs with an ``/ma`` layer for a SMILES or 3D SDfile:

```
$ java -jar inchi-ma.jar input.smi
$ java -jar inchi-ma.jar input.sdf
```

Some example inputs are provided in the ``examples/`` directory.

### Limitations

Currently only constitutionally different neighbors are handled. The system used
can be used to encode geometries such as lambda/delta Fe(ox<sub>3</sub>) if
there was tighter integration with the main canonically labelling algorithm.
here can also be used to encode geometries such as lambda/delta Fe(ox<sub>3</sub>)
if there was tighter integration within the canonically labelling procedure.

### Building

Expand Down
10 changes: 6 additions & 4 deletions src/main/java/com/nextmovesoftware/inchi/InChIMetalArch.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package com.nextmovesoftware.inchi;

import com.google.common.collect.FluentIterable;
import net.sf.jniinchi.INCHI_OPTION;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.exception.InvalidSmilesException;
import org.openscience.cdk.graph.GraphUtil;
Expand All @@ -25,6 +26,7 @@
import org.openscience.cdk.stereo.TrigonalBipyramidal;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
Expand Down Expand Up @@ -135,9 +137,9 @@ public int compare(IAtom o1, IAtom o2) {
return findByBacktrack(table, perm, 100);
}

public static String toInChI(IAtomContainer mol) {
public static String toInChI(IAtomContainer mol, List<INCHI_OPTION> opts) {
try {
InChIGenerator inchigen = inchifact.getInChIGenerator(mol, "");
InChIGenerator inchigen = inchifact.getInChIGenerator(mol, opts);
String inchi = inchigen.getInchi();
String auxinfo = inchigen.getAuxInfo();

Expand Down Expand Up @@ -208,9 +210,9 @@ else if (!(b.getFocus() instanceof IAtom))
}
}

public static String toInChI(String smi) {
public static String toInChI(String smi, List<INCHI_OPTION> opts) {
try {
return toInChI(smipar.parseSmiles(smi));
return toInChI(smipar.parseSmiles(smi), opts);
} catch (InvalidSmilesException e) {
System.err.println("ERROR: Bad SMILES " + e.getMessage());
return null;
Expand Down
77 changes: 43 additions & 34 deletions src/main/java/com/nextmovesoftware/inchi/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

package com.nextmovesoftware.inchi;

import net.sf.jniinchi.INCHI_OPTION;
import org.openscience.cdk.exception.InvalidSmilesException;
import org.openscience.cdk.geometry.GeometryUtil;
import org.openscience.cdk.interfaces.IAtomContainer;
Expand All @@ -27,18 +28,21 @@
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

public class Main {

private static final IChemObjectBuilder builder = SilentChemObjectBuilder.getInstance();
private static final SmilesParser smipar = new SmilesParser(builder);

private static final int SMIFMT = 1;
private static final int SDFFMT = 2;
private static InputStream in;
private static OutputStream out;
private static int fmt = SMIFMT;
private static final int SMIFMT = 1;
private static final int SDFFMT = 2;
private static InputStream in;
private static OutputStream out;
private static int fmt = SMIFMT;
private static List<INCHI_OPTION> options = new ArrayList<>();

private static int determineFormat(String val) {
switch (val.toLowerCase(Locale.ROOT)) {
Expand All @@ -60,7 +64,7 @@ private static int determineFormat(String val) {

private static int determineFormatFromFilename(String fname) {
int idx = fname.lastIndexOf('.');
return determineFormat(fname.substring(idx+1));
return determineFormat(fname.substring(idx + 1));
}

private static boolean processCommandLine(String[] args) {
Expand All @@ -77,33 +81,38 @@ else if (i + 1 < args.length)
return false;
}
fmt = determineFormat(val);
}
switch (j++) {
case 0:
if (args[i].equals("-"))
in = System.in;
else {
try {
in = new FileInputStream(args[i]);
} catch (FileNotFoundException ex) {
System.err.println("File not found: " + args[i]);
return false;
} else if (args[i].equals("-RecMet")) {
options.add(INCHI_OPTION.RecMet);
} else if (args[i].equals("-FixedH")) {
options.add(INCHI_OPTION.FixedH);
} else {
switch (j++) {
case 0:
if (args[i].equals("-"))
in = System.in;
else {
try {
in = new FileInputStream(args[i]);
} catch (FileNotFoundException ex) {
System.err.println("File not found: " + args[i]);
return false;
}
fmt = determineFormatFromFilename(args[i]);
}
fmt = determineFormatFromFilename(args[i]);
}
break;
case 1:
if (args[i].equals("-"))
out = System.out;
else {
try {
out = new FileOutputStream(args[i]);
} catch (FileNotFoundException ex) {
System.err.println("File not found: " + args[i]);
return false;
break;
case 1:
if (args[i].equals("-"))
out = System.out;
else {
try {
out = new FileOutputStream(args[i]);
} catch (FileNotFoundException ex) {
System.err.println("File not found: " + args[i]);
return false;
}
}
}
break;
break;
}
}
}
if (out == null)
Expand All @@ -115,7 +124,7 @@ private static void displayUsage() {
System.err.println("Copyright (c) 2019 NextMove Software");
System.err.println("InChI Metal Arch\n");
System.err.println("Usage:");
System.err.println(" java -jar inchi-ma.jar [-fmt {SMI|SDF}] <infile> [<outfile>]\n");
System.err.println(" java -jar inchi-ma.jar [-fmt {SMI|SDF} -RecMet -FixedH] <infile> [<outfile>]\n");
System.err.println("Examples:");
System.err.println(" Processing a file of SMILES or SDfile");
System.err.println(" java -jar inchi-ma.jar <input.smi>");
Expand All @@ -133,7 +142,7 @@ private static void processSmilesFile(BufferedWriter wtr,
while ((line = brdr.readLine()) != null) {
try {
IAtomContainer mol = smipar.parseSmiles(line);
String inchi = InChIMetalArch.toInChI(mol);
String inchi = InChIMetalArch.toInChI(mol, options);
if (inchi != null)
wtr.write(inchi);
if (mol.getTitle() != null) {
Expand All @@ -159,7 +168,7 @@ private static void processSdfile(BufferedWriter wtr,

PerceiveFrom3d.perceive(mol);

String inchi = InChIMetalArch.toInChI(mol);
String inchi = InChIMetalArch.toInChI(mol, options);
if (inchi != null)
wtr.write(inchi);
if (mol.getTitle() != null) {
Expand Down
14 changes: 10 additions & 4 deletions src/test/java/InChIMetalArchTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,29 @@
*/

import com.nextmovesoftware.inchi.InChIMetalArch;
import net.sf.jniinchi.INCHI_OPTION;
import org.hamcrest.CoreMatchers;
import org.junit.Assert;
import org.junit.Test;

import java.net.URLEncoder;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class InChIMetalArchTest {

private final List<INCHI_OPTION> EMPTY_OPTS = Collections.emptyList();

@Test
public void cisTransPlatin() {
Set<String> inchis = new HashSet<>();
// generate the 30 possible configurations, there should be two unique
// one cis- and one trans-
for (int i = 1; i <= 3; i++) {
String smi = "Cl[Pt@SP" + i + "](Cl)([NH3])[NH3]";
inchis.add(InChIMetalArch.toInChI(smi));
inchis.add(InChIMetalArch.toInChI(smi, EMPTY_OPTS));
}
Assert.assertThat(inchis.size(), CoreMatchers.is(2));
}
Expand All @@ -32,7 +38,7 @@ public void cisTransCoNO2Cl() {
// generate the 4 possible configurations in the shape of U, Z, or 4
for (int i = 1; i <= 30; i++) {
String smi = "Cl[Co@OH" + i + "](Cl)(Cl)(N(=O)(=O))(Cl)N(=O)=O";
inchis.add(InChIMetalArch.toInChI(smi));
inchis.add(InChIMetalArch.toInChI(smi, EMPTY_OPTS));
}
Assert.assertThat(inchis.size(), CoreMatchers.is(2));
}
Expand All @@ -44,7 +50,7 @@ public void facMerCoNO2Cl() {
// one fac- and one mer-
for (int i = 1; i <= 30; i++) {
String smi = "Cl[Co@OH" + i + "](Cl)(Cl)(N(=O)(=O))(N(=O)=O)N(=O)=O";
inchis.add(InChIMetalArch.toInChI(smi));
inchis.add(InChIMetalArch.toInChI(smi, EMPTY_OPTS));
}
Assert.assertThat(inchis.size(), CoreMatchers.is(2));
}
Expand All @@ -56,7 +62,7 @@ public void tbpy2and4() {
// generate the 20 possible configurations
for (int i = 1; i <= 20; i++) {
String smi = "N[Co@TB" + i + "](Cl)(Cl)(Cl)N";
inchis.add(InChIMetalArch.toInChI(smi));
inchis.add(InChIMetalArch.toInChI(smi, EMPTY_OPTS));
}
Assert.assertThat(inchis.size(), CoreMatchers.is(3));
}
Expand Down

0 comments on commit 1cd8309

Please sign in to comment.