Skip to content

Commit

Permalink
Much more robust PDB atom symbol parsing.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnmay committed Oct 19, 2016
1 parent 6c8f009 commit f3c112b
Showing 1 changed file with 51 additions and 5 deletions.
56 changes: 51 additions & 5 deletions storage/pdb/src/main/java/org/openscience/cdk/io/PDBReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.config.AtomTypeFactory;
import org.openscience.cdk.config.Elements;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.exception.NoSuchAtomTypeException;
import org.openscience.cdk.graph.rebond.RebondTool;
Expand Down Expand Up @@ -549,15 +550,60 @@ private boolean createBondsWithRebondTool(IAtomContainer molecule) {
private static boolean isUpper(char c) {
return c >= 'A' && c <= 'Z';
}
private static boolean isLower(char c) {
return c >= 'a' && c <= 'z';
}
private static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}

private String parseAtomSymbol(String str) {

if (str == null || str.isEmpty())
return null;
int pos = 0;

final int len = str.length();
while (pos < len && isUpper(str.charAt(pos)))
pos++;
return str.substring(0, pos);

StringBuilder sym = new StringBuilder();

// try grabbing from end of line

if (len > 76 && isUpper(str.charAt(76))) {
sym.append(str.charAt(76));
if (len > 77 && isUpper(str.charAt(77)))
sym.append(Character.toLowerCase(str.charAt(77)));
else if (len > 77 && isLower(str.charAt(77)))
sym.append(Character.toLowerCase(str.charAt(77)));
} else if (len > 76 &&str.charAt(76) == ' ') {
if (len > 77 && isUpper(str.charAt(77)))
sym.append(str.charAt(77));
}

if (sym.length() > 0)
return sym.toString();

// try getting from PDB atom name
if (len > 13 && isUpper(str.charAt(13))) {
if (str.charAt(12) == ' ') {
sym.append(str.charAt(13));
if (isLower(str.charAt(14)))
sym.append(str.charAt(14));
} else if (isUpper(str.charAt(12))) {
if (str.charAt(0) == 'A' && str.charAt(12) == 'H') {
sym.append('H'); // ATOM record H is always H
} else {
sym.append(str.charAt(12));
sym.append(Character.toLowerCase(str.charAt(13)));
}
} else if (isDigit(str.charAt(12))) {
sym.append(str.charAt(13));
}
}

if (sym.length() > 0)
return sym.toString();

return null;
}

/**
Expand Down Expand Up @@ -591,7 +637,7 @@ private PDBAtom readAtom(String cLine, int lineLength) throws CDKException {
boolean isHetatm = cLine.substring(0, 6).equals("HETATM");
String atomName = cLine.substring(12, 16).trim();
String resName = cLine.substring(17, 20).trim();
String symbol = parseAtomSymbol(atomName);
String symbol = parseAtomSymbol(cLine);

if (symbol == null)
handleError("Cannot parse symbol from " + atomName);
Expand Down

0 comments on commit f3c112b

Please sign in to comment.